From 33ea11cf05c31d80a1445edeb657b80b61837d36 Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Fri, 15 May 2026 14:22:46 +0200 Subject: [PATCH 01/19] runtime draft --- .../runtime-system/implementation_plan.md | 1146 +++++++++++++++++ examples/runtime/inference_async.py | 764 +++++++++++ examples/runtime/inference_pi05_gpu.ipynb | 838 ++++++++++++ src/physicalai/runtime/__init__.py | 43 + src/physicalai/runtime/_action_queue.py | 72 ++ src/physicalai/runtime/execution.py | 250 ++++ src/physicalai/runtime/runtime.py | 226 ++++ src/physicalai/runtime/smoothers.py | 71 + 8 files changed, 3410 insertions(+) create mode 100644 docs/design/components/runtime-system/implementation_plan.md create mode 100644 examples/runtime/inference_async.py create mode 100644 examples/runtime/inference_pi05_gpu.ipynb create mode 100644 src/physicalai/runtime/__init__.py create mode 100644 src/physicalai/runtime/_action_queue.py create mode 100644 src/physicalai/runtime/execution.py create mode 100644 src/physicalai/runtime/runtime.py create mode 100644 src/physicalai/runtime/smoothers.py diff --git a/docs/design/components/runtime-system/implementation_plan.md b/docs/design/components/runtime-system/implementation_plan.md new file mode 100644 index 0000000..07b1473 --- /dev/null +++ b/docs/design/components/runtime-system/implementation_plan.md @@ -0,0 +1,1146 @@ +# Runtime System — Implementation Plan + +This document is the implementation plan for `physicalai.runtime`. It refines the original [policy_runtime_design.md](./policy_runtime_design.md) based on codebase exploration, bug analysis, and architecture review. + +Read [policy_runtime_design.md](./policy_runtime_design.md) first for API shape and ownership rules. This document covers what to build, in what order, and why. + +## Reference Implementation + +The golden reference is `physicalai/examples/runtime/inference_async.py` — a working async prototype with QueueMixer, InferenceThread, velocity clamping, and camera discovery. Every runtime component must match or exceed its behavior. + +--- + +## Phase 1: Critical Bug Fixes (half day) ✅ + +Fix bugs on the code path Phase 2 depends on. Phase 2 defines a public runtime contract — workarounds would calcify into permanent API shape. `predict_action_chunk()` currently raises `RuntimeError` without these fixes because Bug 2's inverted guard blocks the runtime's call to `model.predict_action_chunk(obs)`. This is not stylistic — it is a hard blocker. + +### Bug 1: `use_action_queue` checks manifest, ignores runtime runner + +**File**: `physicalai/src/physicalai/inference/model.py` — `use_action_queue` property + +**Problem**: Reads `self.manifest.model.runner` class_path. Ignores `self.runner` passed at construction or set at runtime. + +**Fix**: Check `isinstance(self.runner, ActionChunking)` instead: + +```python +@property +def use_action_queue(self) -> bool: + from physicalai.inference.runners.action_chunking import ActionChunking + return isinstance(self.runner, ActionChunking) +``` + +### Bug 2: `select_action()` / `predict_action_chunk()` guards inverted + +**File**: `physicalai/src/physicalai/inference/model.py` + +**Problem**: `select_action()` raises when `not use_action_queue`. `predict_action_chunk()` raises when `use_action_queue`. Both are backwards — `select_action()` should be the generic one-action API, `predict_action_chunk()` should return raw chunks. + +**Fix**: Remove guards entirely. Both methods should work for any runner (shape-stable contract per design doc §3): + +| Runner | `select_action()` | `predict_action_chunk()` | +| --------------- | ------------------ | ------------------------ | +| single-pass | runner output | wrap as `(1, D)` chunk | +| chunk-producing | pop one via cursor | runner output | + +### Bug 3: ACT export manifest declares SinglePass instead of ActionChunking + +**File**: `library/src/physicalai/export/mixin_policy.py` — `_build_manifest()` + +**Problem**: Checks `metadata.get("use_action_queue", False)` but ACT never sets this metadata flag. Manifest always gets `SinglePass` runner. + +**Fix**: ACT export must pass `use_action_queue=True` and `chunk_size=` in its metadata kwargs. Same fix needed for Pi0.5 (Bug 5). + +### Deferred Bugs (document as issues, do not block Phase 2) + +| Bug | Summary | Why deferred | +| ----- | --------------------------------------------------------------------- | ------------------------------------------------------- | +| Bug 4 | Manifest missing `hardware` section (`RobotSpec`, `CameraSpec`) | Not on inference code path | +| Bug 5 | Pi0.5 export also declares SinglePass | Same root cause as Bug 3, fix together | +| Bug 7 | Pi0.5 normalization not baked into graph (external pre/postprocessor) | By design — manifest `preprocessors_specs` handles this | +| Bug 8 | Pi0.5 denoising loop not exportable cleanly (11x graph size) | Export concern, not runtime concern | +| Bug 9 | `OVTokenizer` may need `import openvino_tokenizers` for custom ops | Needs verification — may already work via adapter | + +--- + +## Phase 2: Runtime System (2–3 days) ✅ + +New package: `physicalai/src/physicalai/runtime/` + +```text +physicalai/src/physicalai/runtime/ +├── __init__.py # exports: PolicyRuntime, SyncExecution, AsyncExecution, +│ # ActionQueue, LerpSmoother, ReplaceSmoother, RunStats +├── smoothers.py # ChunkSmoother ABC, ReplaceSmoother, LerpSmoother +├── _action_queue.py # ActionQueue (public via __init__, internal module) +├── execution.py # Execution ABC, SyncExecution, AsyncExecution, WorkerDiedError +└── runtime.py # PolicyRuntime, RunStats, default_observation_to_input +``` + +Dependency order: `smoothers.py` → `_action_queue.py` → `execution.py` → `runtime.py` → `__init__.py` + +### Architectural Decisions + +**ActionQueue is owned by PolicyRuntime, not hidden inside Execution.** + +The original design doc keeps Execution (scheduling) and ActionQueue (buffering) as separate concerns. This is correct — when `AsyncExecution(transport="process")` or `RemoteExecution` arrive, they should push chunks into the same ActionQueue without duplicating buffer logic. + +Users get a clean default API: + +```python +runtime = PolicyRuntime( + robot=robot, + model=model, + execution=AsyncExecution(threshold=0.5), + fps=30, +) +runtime.run(duration_s=60) +``` + +Power users can override buffering: + +```python +runtime = PolicyRuntime( + robot=robot, + model=model, + execution=AsyncExecution(threshold=0.5), + action_queue=ActionQueue(smoother=LerpSmoother(duration_frames=10)), + fps=30, +) +``` + +**Execution is a scheduler, not a buffer.** It decides when/where inference runs and pushes results into ActionQueue. It does not own pop, remaining, or chunk_size. + +**InferenceModel must NOT import ActionQueue.** Per design doc §4: if both layers need pop-from-chunk mechanics, they share `ActionChunkCursor`, not `ActionQueue`. + +### 2.1 `smoothers.py` + +Extracted from `QueueMixer.add()` in inference_async.py. + +```python +class ChunkSmoother(ABC): + """Merges a new action chunk into remaining actions from the previous chunk.""" + + @abstractmethod + def merge( + self, + remaining: np.ndarray, # (R, action_dim) — unconsumed actions from previous chunk + incoming: np.ndarray, # (H, action_dim) — new chunk from inference + offset: int, # skip first N actions of incoming (latency compensation) + ) -> np.ndarray: + """Return merged actions array. Called by ActionQueue.push_chunk().""" + ... + + +class ReplaceSmoother(ChunkSmoother): + """Drop remaining actions, use incoming[offset:].""" + + def merge(self, remaining, incoming, offset): + return incoming[offset:] + + +class LerpSmoother(ChunkSmoother): + """Lerp-blend overlapping region, then append non-overlapping tail. + + Stateless merge — no hidden mutation. duration_frames is the fallback + blending window used when offset is 0. When offset > 0, the blending + window is computed from offset directly: lerp_dur = max(offset, 1). + + Matches QueueMixer.add() from inference_async.py: + - Weights: w_i = max(1.0 - i / lerp_dur, 0.0) for old actions + - Overlap region: blended = w * remaining + (1 - w) * incoming + """ + + def __init__(self, duration_frames: int = 5) -> None: + self.duration_frames = duration_frames + + def merge(self, remaining, incoming, offset): + lerp_dur = max(offset, 1) if offset > 0 else self.duration_frames + + incoming = incoming[offset:] + n_remain = len(remaining) + lerp_dur = min(n_remain, lerp_dur) + + weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0) + weights = weights[:, np.newaxis] + + n_blend = min(n_remain, len(incoming)) + blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend] + + return np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32) +``` + +Key: `offset` is not just "skip N actions." It is latency compensation — `offset = int(inference_latency * fps)`. The smoother must handle this, not the caller. + +### 2.2 `_action_queue.py` (public API, internal module) + +Thread-safe action buffer with smoother integration and hold telemetry. + +```python +class ActionQueue: + """Thread-safe action buffer with chunk smoothing and starvation telemetry. + + Public API — exported from physicalai.runtime. Power users can override + the default ActionQueue on PolicyRuntime to customize smoothing behavior. + Execution pushes chunks into it; PolicyRuntime pops actions from it. + """ + + def __init__(self, smoother: ChunkSmoother | None = None) -> None: + self._smoother = smoother or ReplaceSmoother() + self._deque: deque[np.ndarray] = deque() + self._lock = threading.Lock() + self._consecutive_holds: int = 0 + self._total_holds: int = 0 + self._total_pops: int = 0 + + def push_chunk(self, chunk: np.ndarray, offset: int = 0) -> None: + """Merge a new chunk into the queue. Thread-safe.""" + with self._lock: + remaining = (np.stack(list(self._deque)) + if self._deque + else np.empty((0, chunk.shape[1]), dtype=chunk.dtype)) + merged = self._smoother.merge(remaining, chunk, offset) + self._deque.clear() + self._deque.extend(merged) + + def pop(self) -> np.ndarray | None: + """Pop next action, or None if empty. Thread-safe.""" + with self._lock: + if not self._deque: + self._consecutive_holds += 1 + self._total_holds += 1 + return None + self._consecutive_holds = 0 + self._total_pops += 1 + return self._deque.popleft() + + @property + def remaining(self) -> int: + with self._lock: + return len(self._deque) + + def below_threshold(self, threshold: int) -> bool: + return self.remaining < threshold + + def clear(self) -> None: + with self._lock: + self._queue = None + self._index = 0 + + # --- Telemetry --- + @property + def consecutive_holds(self) -> int: + return self._consecutive_holds + + @property + def total_holds(self) -> int: + return self._total_holds + + @property + def total_pops(self) -> int: + return self._total_pops +``` + +### 2.3 `execution.py` + +**Execution ABC** — scheduler only. Pushes chunks into ActionQueue, does not own pop/remaining. + +```python +class Execution(ABC): + """Decides when and where inference runs. Pushes results into ActionQueue.""" + + @abstractmethod + def start(self, model: InferenceModel, action_queue: ActionQueue) -> None: + """Bind to model and queue. Called once before the loop.""" + ... + + @abstractmethod + def maybe_request(self, observation: dict[str, np.ndarray]) -> None: + """Check if new inference is needed. If so, run or schedule it.""" + ... + + @abstractmethod + def warmup(self, sample_observation: dict[str, np.ndarray]) -> None: + """Run one inference to discover chunk_size and seed the queue. + + After warmup(): + - action_queue has one chunk ready (robot starts moving immediately) + - self.chunk_size is set + - self.action_dim is set + """ + ... + + @abstractmethod + def stop(self) -> None: + """Stop scheduling. For async: signal thread, join with timeout.""" + ... + + @property + @abstractmethod + def chunk_size(self) -> int: + """Discovered after warmup(). Used to compute threshold.""" + ... +``` + +**SyncExecution** — blocks on inference when queue runs low. + +```python +class SyncExecution(Execution): + """Synchronous inference in the control thread.""" + + def __init__(self) -> None: + self._model: InferenceModel | None = None + self._queue: ActionQueue | None = None + self._chunk_size: int = 0 + + def start(self, model, action_queue): + self._model = model + self._queue = action_queue + + def warmup(self, sample_observation): + actions = self._model.predict_action_chunk(sample_observation) + self._chunk_size = actions.shape[0] + self._queue.push_chunk(actions, offset=0) + + def maybe_request(self, observation): + if self._queue.below_threshold(1): # refill when empty + actions = self._model.predict_action_chunk(observation) + self._queue.push_chunk(actions, offset=0) + + def stop(self): + pass + + @property + def chunk_size(self): + return self._chunk_size +``` + +**AsyncExecution** — background thread with health monitoring. Maps to `InferenceThread` from inference_async.py. + +```python +class WorkerDiedError(RuntimeError): + """Raised when the inference worker thread dies unexpectedly.""" + pass + + +class AsyncExecution(Execution): + """Async inference in a background thread. + + Thread architecture (matches inference_async.py): + + Control thread (main): Inference thread (background): + ───────────────────── ──────────────────────────── + loop at fps: loop: + obs = robot.get_observation() wait for obs_slot + execution.maybe_request(obs) chunk = model.predict_action_chunk(obs) + action = queue.pop() offset = int(latency * fps) + robot.send_action(action) queue.push_chunk(chunk, offset) + """ + + def __init__( + self, + threshold: float = 0.5, + fps: int = 30, + watchdog_timeout_s: float = 30.0, + max_consecutive_holds: int | None = None, # default: 3 * fps (3 seconds) + ) -> None: + self._threshold_frac = threshold + self._fps = fps + self._watchdog_timeout_s = watchdog_timeout_s + self._max_consecutive_holds = max_consecutive_holds or 3 * fps + + # Set during start() + self._model: InferenceModel | None = None + self._queue: ActionQueue | None = None + self._chunk_size: int = 0 + self._threshold_count: int = 0 + + # Thread state + self._lock = threading.Lock() + self._obs_slot: dict | None = None + self._obs_ready = threading.Event() + self._running_inference = False + self._request_time: float = 0.0 + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + self._death_cause: BaseException | None = None + self._inference_count: int = 0 + + def start(self, model, action_queue): + self._model = model + self._queue = action_queue + self._thread = threading.Thread(target=self._run, name="InferenceThread", daemon=True) + self._thread.start() + + def warmup(self, sample_observation): + actions = self._model.predict_action_chunk(sample_observation) + self._chunk_size = actions.shape[0] + self._threshold_count = int(self._chunk_size * self._threshold_frac) + self._queue.push_chunk(actions, offset=0) + + def maybe_request(self, observation): + # Check for worker death — raise, don't silently continue + if self._thread is not None and not self._thread.is_alive(): + if self._death_cause is not None: + raise WorkerDiedError( + f"Inference thread died: {self._death_cause}" + ) from self._death_cause + + # Check for stuck inference + if self._busy_duration > self._watchdog_timeout_s: + logger.warning( + "Inference stuck for %.0fs — force resetting", self._busy_duration, + ) + self._force_reset() + + # Submit if queue is low and worker is idle + if self._queue.below_threshold(self._threshold_count): + if not self._busy: + # Defensive copy — observation may be reused by caller + snapshot = { + k: v.copy() if isinstance(v, np.ndarray) else v + for k, v in observation.items() + } + with self._lock: + self._obs_slot = snapshot + self._request_time = time.perf_counter() + self._obs_ready.set() + + def stop(self): + if self._thread is not None: + self._stop_event.set() + self._obs_ready.set() # unblock wait + self._thread.join(timeout=10.0) + + @property + def chunk_size(self): + return self._chunk_size + + # --- Health properties --- + + @property + def alive(self) -> bool: + return self._thread is not None and self._thread.is_alive() + + @property + def _busy(self) -> bool: + with self._lock: + return self._obs_slot is not None or self._running_inference + + @property + def _busy_duration(self) -> float: + with self._lock: + if not (self._obs_slot is not None or self._running_inference): + return 0.0 + return time.perf_counter() - self._request_time + + @property + def inference_count(self) -> int: + return self._inference_count + + # --- Internal --- + + def _force_reset(self) -> None: + with self._lock: + self._obs_slot = None + self._running_inference = False + logger.warning("Force reset — cleared stuck inference state") + + def _run(self) -> None: + """Inference thread main loop.""" + try: + while not self._stop_event.is_set(): + self._obs_ready.wait() + self._obs_ready.clear() + + if self._stop_event.is_set(): + return + + with self._lock: + obs = self._obs_slot + self._obs_slot = None + if obs is None: + continue + self._running_inference = True + + t0 = time.perf_counter() + actions = self._model.predict_action_chunk(obs) + latency = time.perf_counter() - t0 + + offset = int(latency * self._fps) + + self._queue.push_chunk(actions, offset=offset) + self._inference_count += 1 + + with self._lock: + self._running_inference = False + + except Exception as e: + self._death_cause = e + logger.error("Inference thread died: %s", e, exc_info=True) +``` + +### 2.4 `runtime.py` + +```python +from __future__ import annotations + +import logging +import time +from collections.abc import Callable, Mapping, Sequence +from typing import Any, Protocol + +import numpy as np + +from physicalai.capture.camera import Camera, Frame +from physicalai.inference import InferenceModel +from physicalai.robot.interface import Robot, RobotObservation +from physicalai.runtime._action_queue import ActionQueue +from physicalai.runtime.execution import Execution, WorkerDiedError +from physicalai.runtime.smoothers import LerpSmoother + +logger = logging.getLogger(__name__) + +def default_observation_to_input( + robot_obs: RobotObservation, + camera_frames: dict[str, Frame], +) -> dict[str, Any]: + """Default observation-to-model-input conversion. + + Maps: + - Joint positions → "state" array + - Camera frames → "images.{name}" arrays + + For Pi0.5 or other models needing custom keys (e.g. "task"), + pass a custom obs_to_input callable to PolicyRuntime. + """ + model_input: dict[str, Any] = {} + + # Collect joint positions into "state" vector + if robot_obs.joint_positions: + model_input["state"] = np.array([robot_obs.joint_positions], dtype=np.float32) + + # Map camera frames to "images.{name}" + for name, frame in camera_frames.items(): + model_input[f"images.{name}"] = frame.data + + return model_input + + +class RuntimeCallback(Protocol): + """Optional hook points in the PolicyRuntime control loop.""" + + def before_send_action(self, *, action: np.ndarray, step: int) -> np.ndarray | None: + """Called before sending action. Return modified action or None to use original.""" + ... + + def on_action_sent(self, *, action: np.ndarray, step: int) -> None: + """Called after action is sent to robot.""" + ... + + def on_hold(self, *, step: int, holds: int) -> None: + """Called when action queue is empty and robot holds last position.""" + ... + + +class PolicyRuntime: + """Runs a policy on robot hardware. + + Loop shape (matches inference_async.py): + obs = robot.get_observation() + model_input = obs_to_input(obs, cameras) + execution.maybe_request(model_input) + action = action_queue.pop() + if action is None: hold position + robot.send_action(action) + sleep_until_next_tick() + """ + + def __init__( + self, + robot: Robot, + model: InferenceModel, + execution: Execution, + fps: float, + cameras: Mapping[str, Camera] | None = None, + action_queue: ActionQueue | None = None, + obs_to_input: Callable[[RobotObservation, dict[str, Frame]], dict[str, Any]] | None = None, + callbacks: Sequence[RuntimeCallback] = (), + ) -> None: + self._robot = robot + self._model = model + self._execution = execution + self._fps = fps + self._cameras = cameras or {} + self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=5)) + self._obs_to_input = obs_to_input or default_observation_to_input + self._callbacks = list(callbacks) + + def run(self, *, duration_s: float | None = None) -> RunStats: + """Run the control loop. + + 1. Warm up — run one inference, seed queue, discover chunk_size + 2. Loop — observe, maybe_request, pop, send, sleep + 3. Shutdown — stop execution, drain + """ + # --- Init --- + self._execution.start(self._model, self._action_queue) + + sample_obs = self._build_model_input() + self._execution.warmup(sample_obs) + + goal_time = 1.0 / self._fps + step = 0 + last_action: np.ndarray | None = None + + try: + while True: + if duration_s is not None and step * goal_time >= duration_s: + break + + loop_start = time.perf_counter() + + # 1. Observe + obs = self._build_model_input() + + # 2. Maybe request inference + self._execution.maybe_request(obs) + + # 3. Pop action + action = self._action_queue.pop() + if action is not None: + last_action = action + else: + action = last_action + holds = self._action_queue.consecutive_holds + if holds == 1: + logger.warning("Queue empty — holding position") + elif holds % self._fps == 0: + logger.warning( + "Queue starvation: %d consecutive holds (%.1fs)", + holds, holds / self._fps, + ) + self._invoke_callback("on_hold", step=step, holds=holds) + + if action is None: + # No warmup result and no previous action — skip + logger.error("No action available (warmup may have failed)") + continue + + # 4. Callbacks + action = self._invoke_callback("before_send_action", action=action, step=step) or action + + # 5. Send + self._robot.send_action(action) + self._invoke_callback("on_action_sent", action=action, step=step) + + # 6. Timing + elapsed = time.perf_counter() - loop_start + sleep_time = goal_time - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + step += 1 + + except KeyboardInterrupt: + logger.info("Interrupted by user") + except WorkerDiedError as e: + logger.error("Worker died during runtime: %s", e) + raise + finally: + self._shutdown(step) + + return RunStats( + steps=step, + total_pops=self._action_queue.total_pops, + total_holds=self._action_queue.total_holds, + inference_count=getattr(self._execution, "inference_count", 0), + ) + + def _build_model_input(self) -> dict[str, Any]: + robot_obs = self._robot.get_observation() + camera_frames = {name: cam.read_latest() for name, cam in self._cameras.items()} + return self._obs_to_input(robot_obs, camera_frames) + + def _shutdown(self, step: int) -> None: + """Robot and cameras must be connected before run(). Caller owns lifecycle.""" + # 1. Stop inference scheduling + self._execution.stop() + + # 2. Drain remaining actions (up to 1s) for smooth stop + remaining = self._action_queue.remaining + drain_limit = min(remaining, int(self._fps)) + for _ in range(drain_limit): + action = self._action_queue.pop() + if action is not None: + self._robot.send_action(action) + time.sleep(1.0 / self._fps) + + logger.info( + "Shutdown complete — %d steps, %d pops, %d holds", + step, self._action_queue.total_pops, self._action_queue.total_holds, + ) + + def _invoke_callback(self, method: str, **kwargs): + result = None + for cb in self._callbacks: + fn = getattr(cb, method, None) + if fn is not None: + result = fn(**kwargs) + return result +``` + +### 2.5 Tests + +```text +physicalai/tests/unit/runtime/ +├── test_smoothers.py # ReplaceSmoother, LerpSmoother: offset handling, lerp weights, +│ # dynamic duration, edge cases (empty remaining, offset > chunk) +├── test_action_queue.py # push/pop, smoother integration, thread safety (concurrent +│ # push+pop from 2 threads), hold counters, clear() +├── test_execution.py # SyncExecution: warmup seeds queue, maybe_request refills +│ # AsyncExecution: mock model, health monitoring (alive/busy/ +│ # busy_duration), WorkerDiedError propagation, force_reset +└── test_runtime.py # PolicyRuntime with mock robot + mock model: + # full loop, hold fallback, shutdown drain, callbacks, + # WorkerDiedError propagation, duration_s limit +``` + +All tests use mock `InferenceModel` and mock `Robot` — no hardware, no exported models. + +--- + +## Phase 3: CLI and Integration (1–2 days) + +1. `physicalai run --config so101_pi05.yaml` CLI command +2. YAML config loader (`PolicyRuntime.from_config()`) +3. Observation builder (bridges `Robot` protocol + `Camera` → model input dict) +4. Migrate `inference_async.py` to use `PolicyRuntime` (becomes ~20 lines) + +### Velocity clamping and camera discovery stay outside core + +Velocity clamping (`max_speed`, `ramp_steps`, `commanded_pos` tracking) is SO-101-specific. Camera discovery (interactive selection, name mapping, blank cameras, flip) is app-specific. + +These belong in: + +- Example scripts (`examples/runtime/`) +- CLI helpers (`physicalai.cli.run`) +- User callbacks + +Not in `PolicyRuntime` or `Execution`. If a reusable pattern emerges across 2+ robots, promote to a formal action-transform layer later. + +--- + +## Phase 3.5: Runtime Telemetry (1–2 days) + +Streaming observability for the runtime control loop. The runtime process must not be bottlenecked by telemetry — all emission is fire-and-forget over zenoh pub-sub. A separate observer process handles visualization, aggregation, and persistence. + +### Constraint + +Two-week release deadline. Scope: emitter in runtime, observer CLI, zenoh-only transport. No OpenTelemetry, no Studio UI integration, no remote telemetry. Use OTel-compatible metric naming so a future OTLP bridge is trivial. + +### Architecture + +```text +┌──────────────────────────────────────────┐ +│ Runtime Process (real-time budget) │ +│ │ +│ PolicyRuntime.run() │ +│ └─ TelemetryEmitter │ +│ zenoh.put() per tick/inference │ +│ no-op if zenoh not installed │ +└──────────┬───────────────────────────────┘ + │ zenoh pub-sub (local SHM) + ▼ +┌──────────────────────────────────────────┐ +│ Observer Process (separate, optional) │ +│ │ +│ TelemetrySubscriber │ +│ ├─ Live console dashboard │ +│ ├─ JSONL file sink (--record) │ +│ └─ Future: WebSocket → Studio UI │ +└──────────────────────────────────────────┘ +``` + +### Topic Schema + +All topics prefixed with `physicalai/rt/{session_id}/`. Session ID is a short hex string generated at `run()` start. + +| Topic | Frequency | Payload (msgpack) | Purpose | +| ----------- | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | +| `tick` | Every control loop tick (30 Hz) | `step`, `timestamp`, `joint_positions: (D,)`, `action_sent: (D,)`, `queue_remaining: int`, `loop_duration_s`, `sleep_time_s` | Core loop health + executed trajectory | +| `inference` | Per inference completion (~2–3 Hz) | `latency_s`, `offset`, `chunk: (H, D)` | Latency monitoring + predicted trajectory | +| `lifecycle` | On start / warmup / shutdown / error | `event: str`, `metadata: dict` | Session boundaries | + +**Serialization**: msgpack with numpy arrays encoded as `{"__np__": true, "dtype": str, "shape": list, "data": bytes}`. Encode cost is ~50 μs per tick event, ~100 μs per inference event. Negligible in a 33 ms tick budget. + +**Naming convention**: Scalar fields use OTel-compatible names (`physicalai.runtime.loop_duration_s`, `physicalai.runtime.inference_latency_s`) so a future OTLP exporter in the observer can re-export without renaming. + +### New Files + +```text +physicalai/src/physicalai/runtime/ +├── _telemetry.py # TelemetryEmitter (zenoh publisher, no-op fallback) +└── observer/ + ├── __init__.py + ├── __main__.py # python -m physicalai.runtime.observer + ├── _subscriber.py # TelemetrySubscriber (zenoh sub + dispatch) + ├── _console.py # Live console dashboard + └── _recorder.py # JSONL file sink for offline replay +``` + +### Dependency + +```toml +# pyproject.toml +[project.optional-dependencies] +telemetry = ["zenoh>=1.0", "msgpack>=1.0"] +``` + +zenoh is optional. `TelemetryEmitter` gracefully degrades to no-op when zenoh is not installed. Observer process requires `physicalai[telemetry]`. + +### ActionQueue Change + +Add `peek_remaining()` — returns a copy of queued actions as `(R, D)` array without consuming them. Called once per inference event (not per tick) to snapshot the post-smooth future trajectory. + +```python +def peek_remaining(self) -> np.ndarray | None: + """Return copy of remaining actions without consuming them. Thread-safe.""" + with self._lock: + if not self._deque: + return None + return np.stack(list(self._deque)) +``` + +### Integration Points + +**PolicyRuntime** (4 touch points in `runtime.py`): + +1. Accept optional `telemetry: TelemetryEmitter | None` in `__init__` +2. `emit_lifecycle("start")` at top of `run()` +3. `emit_tick(...)` at end of each tick +4. `emit_lifecycle("shutdown")` in `_shutdown()` + +**AsyncExecution** (1 touch point in `execution.py`): + +1. Accept optional `telemetry: TelemetryEmitter | None` in `__init__` +2. `emit_inference(...)` after `push_chunk()` in the `_run()` thread + +No changes to `smoothers.py` or `__init__.py` exports. `TelemetryEmitter` is internal wiring, not user-facing API. + +### Out of Scope + +- Remote telemetry (zenoh is network-transparent; enabling it is config, not code) +- Camera frame streaming (stays on iceoryx2) +- OpenTelemetry / OTLP exporter (future observer plugin) +- Studio UI WebSocket bridge +- Model input snapshot recording (expensive, needs opt-in design) +- Smoothing delta visualization (derivable in observer from `inference.chunk` vs `tick.action_sent`) + +--- + +## Phase 3.6: Fault Tolerance (1 day) + +Robot connections over USB/serial and camera feeds are fragile. USB hubs lose power, serial timeouts occur, cameras drop frames. The runtime must not crash on transient hardware errors — it must retry and recover, or degrade gracefully. + +### Current Problem + +`PolicyRuntime._build_model_input()` calls `robot.get_observation()` and `cam.read_latest()` with no error handling. `robot.send_action()` is also unprotected. Any `ConnectionError`, `OSError`, or `serial.SerialException` from the SO-101 serial bus kills the control loop. + +### Design Principles + +1. **Never crash the loop on a transient error.** Retry the operation. If retries are exhausted, hold position and log — do not raise. +2. **Distinguish transient vs fatal.** USB disconnect that resolves after replug = transient. `ValueError` from wrong action shape = fatal (programmer error). `KeyboardInterrupt` = always propagate. +3. **No silent degradation.** Every recovery must log a warning and emit a telemetry lifecycle event. The operator must know the robot hiccupped. +4. **Bound retry duration.** Retries must not exceed the tick budget. If `get_observation()` fails 3 times within the tick, use the last known observation and move on. +5. **Camera failure ≠ robot failure.** A dropped camera frame should use the last known frame (stale but safe). A robot communication failure is more serious but still retryable. + +### Implementation: `_resilient_observe()` and `_resilient_send()` + +Replace the raw calls in the control loop with retry-wrapped variants: + +```python +_MAX_OBS_RETRIES = 3 +_MAX_SEND_RETRIES = 2 +_RETRY_BACKOFF_S = 0.001 # 1ms between retries — bounded, won't bust 33ms budget + +def _resilient_observe(self) -> dict[str, Any]: + """Build model input with retry on transient hardware errors.""" + # Robot observation + robot_obs = None + for attempt in range(_MAX_OBS_RETRIES): + try: + robot_obs = self._robot.get_observation() + self._consecutive_error_ticks = 0 + break + except (ConnectionError, OSError) as e: + logger.warning("Robot observation failed (attempt %d/%d): %s", + attempt + 1, _MAX_OBS_RETRIES, e) + if self._telemetry: + self._telemetry.emit_lifecycle("obs_error", error=str(e), + attempt=attempt + 1) + if attempt < _MAX_OBS_RETRIES - 1: + time.sleep(_RETRY_BACKOFF_S) + + if robot_obs is None: + self._consecutive_error_ticks += 1 + if self._consecutive_error_ticks >= self._max_consecutive_error_ticks: + msg = (f"Robot unreachable for {self._consecutive_error_ticks} consecutive ticks " + f"({self._consecutive_error_ticks / self._fps:.1f}s)") + if self._telemetry: + self._telemetry.emit_lifecycle("connection_lost", error=msg) + raise ConnectionError(msg) + if self._last_robot_obs is not None: + logger.warning("Using stale robot observation (error tick %d/%d)", + self._consecutive_error_ticks, + self._max_consecutive_error_ticks) + robot_obs = self._last_robot_obs + self._stale_obs_ticks += 1 + else: + raise ConnectionError("Robot observation failed and no fallback available") + self._last_robot_obs = robot_obs + + # Camera frames — independent per camera, each can fail independently + camera_frames: dict[str, Frame] = {} + for name, cam in self._cameras.items(): + try: + camera_frames[name] = cam.read_latest() + self._last_camera_frames[name] = camera_frames[name] + except (ConnectionError, OSError) as e: + logger.warning("Camera '%s' read failed: %s — using last frame", name, e) + if name in self._last_camera_frames: + camera_frames[name] = self._last_camera_frames[name] + self._stale_obs_ticks += 1 + + return self._obs_to_input(robot_obs, camera_frames) + + +def _resilient_send(self, action: np.ndarray) -> None: + """Send action with retry on transient errors.""" + for attempt in range(_MAX_SEND_RETRIES): + try: + self._robot.send_action(action) + self._consecutive_error_ticks = 0 + return + except (ConnectionError, OSError) as e: + logger.warning("send_action failed (attempt %d/%d): %s", + attempt + 1, _MAX_SEND_RETRIES, e) + if self._telemetry: + self._telemetry.emit_lifecycle("send_error", error=str(e), + attempt=attempt + 1) + if attempt < _MAX_SEND_RETRIES - 1: + time.sleep(_RETRY_BACKOFF_S) + self._consecutive_error_ticks += 1 + self._transient_errors += 1 + logger.error("send_action failed after %d attempts — skipping tick", + _MAX_SEND_RETRIES) +``` + +### Error Classification + +| Exception type | Source | Treatment | +| ---------------------------- | --------------------------------------------------------- | --------------------------------- | +| `ConnectionError` | SO-101 serial port closed, servo not responding | Retry, then stale fallback | +| `OSError` | USB disconnect, file descriptor error, camera device lost | Retry, then stale fallback | +| `TimeoutError` | Serial read timeout (subclass of `OSError`) | Retry, then stale fallback | +| `ValueError`, `RuntimeError` | Wrong action shape, uncalibrated mode, programming error | **Fatal** — propagate immediately | +| `WorkerDiedError` | Inference thread crash | **Fatal** — propagate immediately | +| `KeyboardInterrupt` | User Ctrl+C | **Always propagate** | + +### State Tracking + +`PolicyRuntime` gains fields for stale-observation fallback and error escalation: + +```python +self._last_robot_obs: RobotObservation | None = None +self._last_camera_frames: dict[str, Frame] = {} +self._consecutive_error_ticks: int = 0 +self._max_consecutive_error_ticks: int = int(3 * fps) # ~3 seconds +self._stale_obs_ticks: int = 0 +self._transient_errors: int = 0 +``` + +`_consecutive_error_ticks` is reset to 0 on any successful `get_observation()` or `send_action()`. It increments when all retries within a tick fail. When it reaches `max_consecutive_error_ticks`, the runtime raises `ConnectionError`. + +`RunStats` gains fault tolerance metrics: + +```python +@dataclass(frozen=True) +class RunStats: + steps: int + total_pops: int + total_holds: int + inference_count: int + transient_errors: int # total retried hardware errors + stale_obs_ticks: int # ticks where stale observation was used +``` + +The telemetry `tick` event gains a `stale_obs: bool` field so the observer can flag degraded ticks. + +### Warmup Resilience + +Warmup is the most fragile moment — USB may not be fully enumerated, servos may still be initializing. The first call to `_build_model_input()` has no stale fallback. + +Warmup gets its own retry loop with longer timeout: + +```python +_WARMUP_RETRIES = 5 +_WARMUP_BACKOFF_S = 1.0 # 1 second between warmup retries + +def _warmup_with_retry(self) -> None: + for attempt in range(_WARMUP_RETRIES): + try: + sample_obs = self._build_model_input() + self._execution.warmup(sample_obs) + return + except (ConnectionError, OSError) as e: + logger.warning("Warmup failed (attempt %d/%d): %s", + attempt + 1, _WARMUP_RETRIES, e) + if attempt < _WARMUP_RETRIES - 1: + time.sleep(_WARMUP_BACKOFF_S) + msg = f"Warmup failed after {_WARMUP_RETRIES} attempts — robot or cameras unreachable" + raise ConnectionError(msg) +``` + +Total warmup retry budget: 5 seconds. Long enough for USB enumeration, short enough to not confuse the user. + +### Reconnection + +Retry within a tick handles brief glitches (serial timeout, dropped USB packet). For sustained disconnects (USB cable pulled), the robot's `is_connected()` returns `False` and consecutive retries will keep failing. + +Full reconnection (calling `robot.disconnect()` then `robot.connect()`) is **not** done automatically in the runtime. Reconnecting a serial bus mid-loop is dangerous — it resets servo state, torque settings, and calibration. This must be an explicit user decision. + +Instead, the runtime: + +1. Logs an error with reconnection instructions +2. Continues holding position (stale obs + last action) +3. Emits `lifecycle("connection_lost")` telemetry event +4. After `max_consecutive_errors` (default: `3 * fps` = 3 seconds of failures), raises `ConnectionError` with a clear message + +This gives the user time to replug USB without the loop crashing, but doesn't silently run for minutes with a dead robot. + +### Camera Recovery + +Cameras are more resilient than robots — `SharedCamera` (iceoryx2) handles publisher death and re-spawn. Direct camera backends (`RealsenseCamera`, `UVCCamera`) may need explicit reconnection. + +The runtime treats camera failure as soft: use last frame, log warning, continue. If a camera has never produced a frame (failure on first read), omit it from the model input dict. The model's behavior with missing camera keys is model-specific — not the runtime's concern. + +### Tests + +```text +physicalai/tests/unit/runtime/ +└── test_fault_tolerance.py # Mock robot that raises on get_observation/send_action: + # - transient error → retry succeeds + # - sustained error → stale fallback + # - fatal error → propagates + # - camera failure → stale frame used + # - max_consecutive_errors → ConnectionError raised +``` + +--- + +## Phase 4: Advanced (later) + +1. `AsyncExecution(transport="process")` for PyTorch CPU (GIL contention) +2. RTC guidance correction (requires split export from `library/`) +3. `RemoteExecution` + gRPC `PolicyServer` (see [policy_server_design.md](./policy_server_design.md)) + +--- + +## Component Mapping: inference_async.py → Library + +| Script component | Library target | Notes | +| ------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------- | +| `QueueMixer` | `ActionQueue` + `LerpSmoother` | Offset-aware blending extracted into smoother | +| `QueueMixer.lerp_duration = max(offset, 1)` | `LerpSmoother.merge()` computes from offset | Stateless — `duration_frames` is fallback when offset=0 | +| `InferenceThread` | `AsyncExecution` | Same thread architecture: obs_slot + result push | +| `InferenceThread.force_reset()` | `AsyncExecution._force_reset()` | Clears stuck state | +| `InferenceThread.busy_duration` | `AsyncExecution._busy_duration` | Watchdog timeout trigger | +| `InferenceThread.alive` | `AsyncExecution.alive` | Dead thread detection | +| `get_full_observation()` | `default_observation_to_input()` + `obs_to_input` callable | Separates robot obs format from model input format | +| `action_to_robot_dict()` | `Robot.send_action(ndarray)` | Robot protocol handles conversion | +| `main()` while-loop | `PolicyRuntime.run()` | Same 5-step structure | +| Velocity clamping + ramp | User callback or example code | Too robot-specific for core runtime | +| Camera discovery + `SharedCamera` | User code / CLI (Phase 3) | App-specific | +| Warm-up inference + queue seeding | `Execution.warmup()` | Seeds queue so loop starts with actions | +| `inference_thread.alive` check + restart | `AsyncExecution.maybe_request()` raises `WorkerDiedError` | Raise instead of silent restart | +| `force_reset()` for stuck thread | `AsyncExecution._force_reset()` via watchdog | Auto-triggered when `busy_duration > timeout` | +| Hold position + `hold_count` | `ActionQueue.consecutive_holds` + `PolicyRuntime` logging | Telemetry via queue counters | +| Two-backend support (torch/OV) | `InferenceModel` abstraction | Runtime only calls `predict_action_chunk()` | +| (none — ad-hoc print/log) | `TelemetryEmitter` + zenoh pub-sub | Fire-and-forget, no-op when zenoh absent (Phase 3.5) | +| (none — loop crashes on USB error) | `_resilient_observe()` / `_resilient_send()` | Retry + stale fallback for transient errors (Phase 3.6) | + +--- + +## Design Gaps Addressed + +Gaps identified during architecture review, with resolutions. + +### Gap 1: Observation ownership in async + +**Problem**: Main thread passes observation dict to `maybe_request()`. If main thread reuses camera buffers, inference thread reads corrupted data. + +**Resolution**: `AsyncExecution.maybe_request()` performs defensive copy before submitting: + +```python +snapshot = { + k: v.copy() if isinstance(v, np.ndarray) else v + for k, v in observation.items() +} +``` + +Cost: one dict of numpy copies per inference request (not per tick — only when threshold triggers). At 30fps with threshold=0.5 and chunk_size=50, that's roughly once every ~0.8s. + +### Gap 2: Empty-queue telemetry + +**Problem**: "Hold last action" silently masks queue starvation. + +**Resolution**: `ActionQueue` tracks `consecutive_holds` and `total_holds`. `PolicyRuntime` logs warnings on first hold and every `fps` consecutive holds (1 per second). `on_hold` callback exposes starvation events to user code. + +### Gap 3: Graceful shutdown + +**Problem**: Hard stop can jerk the robot. + +**Resolution**: `PolicyRuntime._shutdown()` drains up to 1 second of remaining actions at loop FPS. Beyond 1s, hard stop — the user pressed Ctrl+C for a reason. Robot and cameras stay connected here; caller owns connect/disconnect lifecycle. + +### Gap 4: Error propagation + +**Problem**: Inference thread exceptions silently swallowed — `pop_action()` returns None forever. + +**Resolution**: `AsyncExecution` stores `_death_cause` on thread exception. `maybe_request()` checks `alive` and raises `WorkerDiedError` with original traceback preserved via `raise ... from`. PolicyRuntime catches and re-raises. + +### Gap 5: Two-backend support + +**Problem**: inference_async.py handles PyTorch and OpenVINO with different preprocessing. + +**Resolution**: Not a runtime concern. `InferenceModel` abstracts backend differences. Runtime only calls `model.predict_action_chunk(obs)`. The torch bypass in inference_async.py exists because the script bypasses `InferenceModel` for direct Pi05 policy access — the library runtime won't need this. + +--- + +## Resolved Questions + +1. **`PolicyRuntime.run()` returns `RunStats`.** `@dataclass` with 4 core fields (steps, total_pops, total_holds, inference_count) plus 2 fault tolerance fields (transient_errors, stale_obs_ticks) added in Phase 3.6. Useful for testing and logging. + +2. **Warm-up happens inside `run()`.** User doesn't forget, no "did I call warmup?" failure mode. + +3. **Dead worker raises `WorkerDiedError`.** Let caller decide recovery strategy. Auto-restart can mask systematic failures. + +--- + +## Relationship to Existing Design Docs + +This plan refines [policy_runtime_design.md](./policy_runtime_design.md). Key differences: + +| Topic | Original design doc | This plan | +| ------------------------------- | ---------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| ActionQueue visibility | Public parameter on PolicyRuntime | Public parameter with sensible default. Internal `_action_queue.py` module, exported via `__init__`. | +| Execution ABC | `start(action_queue, model)`, `maybe_request(obs, action_queue)` | `start(model, action_queue)`, `maybe_request(obs)` — queue stored internally on start, not passed per call | +| Warmup | `warmup(sample_observation, n=2)` | `warmup(sample_observation)` — one call, seeds queue | +| Health monitoring | Not specified | First-class: alive, busy, busy_duration, WorkerDiedError, watchdog | +| Smoothing | `LerpChunkSmoother`, `ReplaceMerger` | `LerpSmoother`, `ReplaceSmoother` — stateless merge, offset-aware | +| Observation bridge | Not specified | `obs_to_input` callable with `default_observation_to_input` fallback | +| `predict_action_chunk()` return | `Mapping[str, Any]` with `"actions"` key | `np.ndarray` directly (matches actual implementation) | +| Bug fixes | Not applicable | Phase 1 prerequisite — bugs 1-3 on critical path | +| Telemetry | Not specified | Phase 3.5: zenoh pub-sub emitter (fire-and-forget), separate observer process, optional dep | +| Fault tolerance | Not specified | Phase 3.6: retry + stale fallback for transient HW errors, error classification, bounded retry | + +All ownership rules, boundary constraints, and deferred-until-needed decisions from the original design doc remain in effect. diff --git a/examples/runtime/inference_async.py b/examples/runtime/inference_async.py new file mode 100644 index 0000000..9eade0a --- /dev/null +++ b/examples/runtime/inference_async.py @@ -0,0 +1,764 @@ +# pyrefly: ignore-errors +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Async threaded inference for SO-101 with lerp-blended action chunking. + +Supports two backends: +- **PyTorch/CUDA**: Pi05 checkpoint (.ckpt file) — GPU inference via select_action. +- **OpenVINO**: Exported model directory — CPU/GPU inference via InferenceModel. + +The backend is auto-detected from the checkpoint path: +- File ending in .ckpt → PyTorch Pi05 +- Directory → OpenVINO InferenceModel + +The main thread runs the robot control loop at a fixed FPS. +A background thread runs model inference and pushes action chunks +into a shared queue with lerp blending for smooth transitions. + +Uses LeRobot SO101Follower (degrees) for robot control and +physicalai SharedCamera (iceoryx2) for camera input. + +Usage (PyTorch): + python inference_async_pi05.py \ + --model-path pi05/pi05_eugene.ckpt \ + --device cuda \ + --robot-port /dev/ttyACM1 \ + --task "pick a can and place it in a bowl" + +Usage (OpenVINO): + python inference_async_pi05.py \ + --model-path exports/pi05_ov \ + --device CPU \ + --robot-port /dev/ttyACM1 \ + --camera-map overhead=top arm=wrist \ + --task "pick a can and place it in a bowl" +""" + +from __future__ import annotations + +import argparse +import logging +import threading +import time +from pathlib import Path + +import cv2 +import numpy as np + +from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig +from physicalai.capture import SharedCamera +from physicalai.capture.discovery import DeviceInfo, discover_all + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(threadName)s] %(name)s: %(message)s", +) +logger = logging.getLogger(__name__) + +JOINT_NAMES = [ + "shoulder_pan", + "shoulder_lift", + "elbow_flex", + "wrist_flex", + "wrist_roll", + "gripper", +] + +IMAGE_WIDTH = 640 +IMAGE_HEIGHT = 480 +CAMERA_FPS = 30 + + +# --------------------------------------------------------------------------- +# Queue Mixer — lerp-blends old and new action chunks +# --------------------------------------------------------------------------- + +class QueueMixer: + """Action queue with linear-interpolation blending between chunks. + + When a new chunk arrives while old actions remain, the overlapping + region is lerp-blended so the robot doesn't jerk. An *offset* + parameter lets us skip the first N actions of the new chunk to + compensate for inference latency. + """ + + def __init__(self, lerp_duration: int = 5) -> None: + self.queue: np.ndarray | None = None + self.lerp_duration = lerp_duration + self.index = 0 + + def add(self, chunk: np.ndarray, offset: int = 0) -> None: + """Merge *chunk* (H, action_dim) into the queue.""" + if self.queue is None or self.index >= len(self.queue): + self.queue = chunk[offset:] + self.index = 0 + logger.info( + "[QueueMixer] First/exhausted → replaced queue (len=%d, offset=%d)", + len(self.queue), offset, + ) + return + + remaining = self.queue[self.index:] + incoming = chunk[offset:] + n_remain = len(remaining) + lerp_dur = min(n_remain, self.lerp_duration) + + weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0) + weights = weights[:, np.newaxis] + + n_blend = min(n_remain, len(incoming)) + blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend] + + self.queue = np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32) + self.index = 0 + logger.info( + "[QueueMixer] Blended chunk (blended=%d, appended=%d, total=%d, lerp=%d, offset=%d)", + n_blend, max(len(incoming) - n_blend, 0), len(self.queue), lerp_dur, offset, + ) + + def pop(self) -> np.ndarray | None: + """Pop the next action, or return None if empty.""" + if self.queue is None or self.index >= len(self.queue): + return None + action = self.queue[self.index] + self.index += 1 + return action + + @property + def remaining(self) -> int: + if self.queue is None: + return 0 + return max(len(self.queue) - self.index, 0) + + @property + def empty(self) -> bool: + return self.remaining == 0 + + +# --------------------------------------------------------------------------- +# Inference Thread (PyTorch / CUDA) +# --------------------------------------------------------------------------- + +class InferenceThread: + """Background thread that runs model inference on demand. + + Supports two backends: + - "torch": Pi05 GPU inference via select_action + action_queue draining. + - "openvino": InferenceModel inference via model(dict)["action"]. + + The control loop writes an observation dict into `obs_slot` and sets + `obs_ready`. The inference thread picks it up, runs inference, and + pushes the result (chunk + timing) into `result_slot`. + """ + + def __init__( + self, + model: object, + backend: str, + device: str, + task: str, + flip_cameras: set[str], + camera_name_map: dict[str, str] | None = None, + blank_cameras: list[str] | None = None, + ) -> None: + self.model = model + self.backend = backend + self.device = device + self.task = task + self.flip_cameras = flip_cameras + self.camera_name_map = camera_name_map or {} + self.blank_cameras = blank_cameras or [] + + self._lock = threading.Lock() + self._obs_slot: dict | None = None + self._obs_ready = threading.Event() + self._running_inference = False + self._request_time = 0.0 + + self._result_lock = threading.Lock() + self._result_slot: tuple[np.ndarray, float] | None = None + + self._stop = threading.Event() + self._thread = threading.Thread(target=self._run, name="InferenceThread", daemon=True) + self.inference_count = 0 + + def start(self) -> None: + logger.info("[InferenceThread] Starting background thread") + self._thread.start() + + def stop(self) -> None: + logger.info("[InferenceThread] Requesting stop") + self._stop.set() + self._obs_ready.set() + self._thread.join(timeout=10.0) + logger.info("[InferenceThread] Stopped (ran %d inferences)", self.inference_count) + + def request(self, observation: dict) -> bool: + """Submit an observation dict for inference. Returns False if busy.""" + with self._lock: + if self._obs_slot is not None or self._running_inference: + return False + self._obs_slot = observation + self._request_time = time.perf_counter() + self._obs_ready.set() + logger.debug("[InferenceThread] Observation submitted") + return True + + @property + def busy(self) -> bool: + with self._lock: + return self._obs_slot is not None or self._running_inference + + @property + def alive(self) -> bool: + return self._thread.is_alive() + + @property + def busy_duration(self) -> float: + """Seconds since last request was submitted. 0 if not busy.""" + with self._lock: + if not (self._obs_slot is not None or self._running_inference): + return 0.0 + return time.perf_counter() - self._request_time + + def force_reset(self) -> None: + """Clear stuck state so new requests can be submitted.""" + with self._lock: + self._obs_slot = None + self._running_inference = False + logger.warning("[InferenceThread] Force reset — cleared stuck state") + + def get_result(self) -> tuple[np.ndarray, float] | None: + """Non-blocking fetch of the latest result. Returns (chunk, latency_s) or None.""" + with self._result_lock: + r = self._result_slot + self._result_slot = None + return r + + def _build_torch_observation(self, obs_dict: dict) -> object: + """Convert raw observation dict to Pi05 Observation on device (torch).""" + import torch + from physicalai.data.observation import Observation + + state = torch.tensor( + [[obs_dict[f"{jn}.pos"] for jn in JOINT_NAMES]], + dtype=torch.float32, + device=self.device, + ) + + images = {} + for cam_key in [k for k in obs_dict if isinstance(obs_dict[k], np.ndarray) and obs_dict[k].ndim == 3]: + img = np.ascontiguousarray(obs_dict[cam_key]) + if cam_key in self.flip_cameras: + img = cv2.rotate(img, cv2.ROTATE_180) + t = torch.from_numpy(img.copy()).float() / 255.0 + t = t.permute(2, 0, 1).unsqueeze(0) + images[cam_key] = t.to(self.device) + + return Observation(state=state, images=images, task=self.task) + + def _build_ov_observation(self, obs_dict: dict) -> dict[str, np.ndarray]: + """Convert raw observation dict to InferenceModel input (numpy).""" + state = np.array([[obs_dict[f"{jn}.pos"] for jn in JOINT_NAMES]], dtype=np.float32) + observation: dict[str, np.ndarray] = {"state": state, "task": [self.task]} + + for cam_key in [k for k in obs_dict if isinstance(obs_dict[k], np.ndarray) and obs_dict[k].ndim == 3]: + img = np.ascontiguousarray(obs_dict[cam_key]) + if cam_key in self.flip_cameras: + img = cv2.rotate(img, cv2.ROTATE_180) + img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) + img = img.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 + model_name = self.camera_name_map.get(cam_key, cam_key) + observation[f"images.{model_name}"] = img + + for blank_cam in self.blank_cameras: + observation[f"images.{blank_cam}"] = np.zeros( + (1, 3, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32, + ) + + return observation + + def _infer_torch(self, obs_dict: dict) -> np.ndarray: + """Run Pi05 inference and return the full action chunk as numpy.""" + import torch + from collections import deque + + observation = self._build_torch_observation(obs_dict) + self.model._action_queue = deque() + + with torch.no_grad(): + action = self.model.select_action(observation) + if self.device == "cuda": + torch.cuda.synchronize() + + actions = [action.detach().cpu().numpy()] + while len(self.model._action_queue) > 0: + a = self.model._action_queue.popleft() + actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a) + + return np.array([np.squeeze(a) for a in actions], dtype=np.float32) + + def _infer_ov(self, obs_dict: dict) -> np.ndarray: + """Run OpenVINO inference and return the full action chunk as numpy.""" + model_input = self._build_ov_observation(obs_dict) + output = self.model(model_input)["action"] + chunk = np.squeeze(output) + if chunk.ndim == 1: + chunk = chunk[np.newaxis, :] + return chunk.astype(np.float32) + + def _run(self) -> None: + logger.info("[InferenceThread] Thread started, waiting for observations") + while not self._stop.is_set(): + self._obs_ready.wait() + self._obs_ready.clear() + + if self._stop.is_set(): + break + + with self._lock: + obs_dict = self._obs_slot + self._obs_slot = None + self._running_inference = True + + if obs_dict is None: + with self._lock: + self._running_inference = False + continue + + self.inference_count += 1 + logger.info("[InferenceThread] >>> Inference #%d START", self.inference_count) + t0 = time.perf_counter() + + try: + if self.backend == "torch": + chunk = self._infer_torch(obs_dict) + else: + chunk = self._infer_ov(obs_dict) + except Exception: + logger.exception("[InferenceThread] Inference failed") + with self._lock: + self._running_inference = False + continue + + latency = time.perf_counter() - t0 + logger.info( + "[InferenceThread] <<< Inference #%d DONE latency=%.1fms chunk_shape=%s", + self.inference_count, latency * 1000, chunk.shape, + ) + + with self._result_lock: + self._result_slot = (chunk, latency) + with self._lock: + self._running_inference = False + + +# --------------------------------------------------------------------------- +# Observation helpers +# --------------------------------------------------------------------------- + +def get_full_observation( + robot: SO101Follower, + cameras: dict[str, SharedCamera], +) -> dict: + """Combine robot state + SharedCamera images into one dict. + + LeRobot SO101Follower returns joint values in the correct + model domain (body joints in degrees, gripper in [0, 100]). + """ + obs = robot.get_observation() + for cam_key, cam in cameras.items(): + frame = cam.read_latest() + obs[cam_key] = frame.data + return obs + + +def action_to_robot_dict(action: np.ndarray) -> dict[str, float]: + """Convert action array to a LeRobot send_action dict.""" + return {f"{jn}.pos": float(action[i]) for i, jn in enumerate(JOINT_NAMES)} + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Camera discovery +# --------------------------------------------------------------------------- + +def select_cameras() -> dict[str, str]: + """Discover cameras and interactively ask user to assign overhead/arm.""" + print("\n[cameras] Discovering cameras ...", flush=True) + + all_devices = discover_all() + + # Flatten into a single list (deduplicate across drivers by hardware_id) + seen: set[str] = set() + devices: list[DeviceInfo] = [] + for driver_devices in all_devices.values(): + for dev in driver_devices: + key = dev.hardware_id or f"{dev.driver}:{dev.device_id}" + if key not in seen: + seen.add(key) + devices.append(dev) + + if not devices: + raise RuntimeError("No cameras found. Check connections and permissions.") + + print(f"\n Found {len(devices)} camera(s):", flush=True) + for i, dev in enumerate(devices): + stable_id = dev.hardware_id or "" + print(f" [{i}] {dev.name or dev.model or 'Unknown'} ({stable_id or dev.device_id})", flush=True) + + cameras: dict[str, str] = {} + for role in ("overhead", "arm"): + while True: + choice = input(f"\n Select {role} camera (0-{len(devices)-1}), or 's' to skip: ").strip() + if choice.lower() == "s": + print(f" Skipping {role} camera", flush=True) + break + if choice.isdigit() and 0 <= int(choice) < len(devices): + dev = devices[int(choice)] + # Prefer stable by-id path, fall back to device_id + path = dev.hardware_id if dev.hardware_id else dev.device_id + cameras[role] = path + print(f" {role} → {path}", flush=True) + break + print(f" Invalid choice. Enter 0-{len(devices)-1} or 's'.") + + if not cameras: + raise RuntimeError("At least one camera must be selected.") + + return cameras + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Async threaded SO-101 inference (PyTorch or OpenVINO).") + p.add_argument("--model-path", type=str, required=True, + help="Path to Pi05 checkpoint (.ckpt) or OpenVINO export directory") + p.add_argument("--device", type=str, default="cuda", + help="'cuda' or 'cpu' for torch; 'CPU', 'GPU', 'NPU' for OpenVINO") + p.add_argument("--robot-port", type=str, default="/dev/ttyACM1") + p.add_argument("--robot-id", type=str, default="my_so101_follower") + p.add_argument("--overhead-camera", type=str, default=None, + help="UVC device path for overhead camera (skip interactive selection)") + p.add_argument("--arm-camera", type=str, default=None, + help="UVC device path for arm camera (skip interactive selection)") + p.add_argument("--task", type=str, default="pick a can and place it in a bowl") + p.add_argument("--fps", type=int, default=30) + p.add_argument("--queue-threshold", type=float, default=0.5, + help="Request new inference when queue drops below this fraction of chunk size.") + p.add_argument("--lerp-duration", type=int, default=5, + help="Number of frames over which to blend old/new chunks.") + p.add_argument("--flip-cameras", nargs="*", default=[], + help="Camera names to rotate 180 degrees.") + p.add_argument("--infer-timeout", type=float, default=15.0, + help="Seconds before force-resetting a stuck inference thread.") + p.add_argument("--max-speed", type=float, default=90.0, + help="Max joint speed in degrees/second. Clamps action deltas per step.") + p.add_argument("--ramp-steps", type=int, default=30, + help="Number of initial steps to ramp up speed (starts at 1/4 max-speed).") + p.add_argument("--no-compile", action="store_true", default=True, + help="Disable torch.compile (default: disabled, torch only)") + # OpenVINO-specific options + p.add_argument("--camera-map", nargs="*", + help="Map camera names to model input names, e.g. overhead=top arm=wrist (OV only)") + p.add_argument("--blank-cameras", nargs="*", + help="Model camera inputs to fill with zeros, e.g. side (OV only)") + return p.parse_args() + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(args: argparse.Namespace) -> None: + model_path = Path(args.model_path) + if not model_path.exists(): + raise FileNotFoundError(f"Model not found: {model_path}") + + # Auto-detect backend + if model_path.is_dir(): + backend = "openvino" + # Default device for OpenVINO is CPU when user left the torch default + if args.device in ("cuda", "cuda:0"): + print(f"[init] Warning: OpenVINO backend does not support CUDA. Falling back to CPU.", flush=True) + args.device = "CPU" + else: + args.device = args.device.upper() + elif model_path.suffix == ".ckpt": + backend = "torch" + else: + raise ValueError( + f"Cannot determine backend from '{model_path}'. " + "Use a .ckpt file for PyTorch or a directory for OpenVINO." + ) + + flip_cameras = set(args.flip_cameras or []) + camera_name_map = dict(kv.split("=") for kv in (args.camera_map or [])) + blank_cameras = list(args.blank_cameras or []) + + print(f"[init] Model: {model_path}", flush=True) + print(f"[init] Backend: {backend}", flush=True) + print(f"[init] Device: {args.device}", flush=True) + print(f"[init] Flip cameras: {flip_cameras}", flush=True) + if backend == "openvino": + print(f"[init] Camera map: {camera_name_map}", flush=True) + print(f"[init] Blank cameras: {blank_cameras}", flush=True) + + # --- Set up cameras --- + # Use CLI paths if provided, otherwise run interactive selection + if args.overhead_camera or args.arm_camera: + camera_paths: dict[str, str] = {} + if args.overhead_camera: + camera_paths["overhead"] = args.overhead_camera + if args.arm_camera: + camera_paths["arm"] = args.arm_camera + else: + camera_paths = select_cameras() + + cameras: dict[str, SharedCamera] = {} + for cam_name, cam_device in camera_paths.items(): + cameras[cam_name] = SharedCamera( + "uvc", device=cam_device, + width=IMAGE_WIDTH, height=IMAGE_HEIGHT, fps=CAMERA_FPS, + ) + + for name, cam in cameras.items(): + cam.connect() + print(f"[init] Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps", flush=True) + + # --- Load model --- + if backend == "torch": + import torch + from physicalai.policies.pi05 import Pi05 + + print("[init] Loading Pi05 model (map_location=cpu) ...", flush=True) + model = Pi05.load_from_checkpoint( + str(model_path), map_location="cpu", compile_model=not args.no_compile, + ) + print("[init] Checkpoint loaded to CPU, moving to device ...", flush=True) + model = model.to(args.device) + model.eval() + print(f"[init] Model loaded on {args.device}", flush=True) + if args.device == "cuda": + print(f"[init] VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB", flush=True) + else: + import openvino_tokenizers # noqa: F401 + from physicalai.inference import InferenceModel + from physicalai.inference.runners import SinglePass + + print(f"[init] Loading OpenVINO model from {model_path} ...", flush=True) + model = InferenceModel( + export_dir=str(model_path), + device=args.device, + runner=SinglePass(), + ) + print("[init] Model loaded", flush=True) + + + # --- Connect robot (LeRobot SO101Follower, no cameras) --- + print(f"[init] Connecting robot on {args.robot_port} ...", flush=True) + robot_cfg = SO101FollowerConfig( + port=args.robot_port, + id=args.robot_id, + ) + robot = SO101Follower(robot_cfg) + robot.connect() + print(f"[init] Robot connected (id={args.robot_id})", flush=True) + + # --- Warm-up inference to determine chunk size --- + print("[init] Getting warm-up observation ...", flush=True) + warmup_obs = get_full_observation(robot, cameras) + print(f"[init] Got observation, keys: {list(warmup_obs.keys())}", flush=True) + + print("[init] Running warm-up inference ...", flush=True) + t0 = time.perf_counter() + + if backend == "torch": + import torch + from collections import deque + from physicalai.data.observation import Observation + + state = torch.tensor( + [[warmup_obs[f"{jn}.pos"] for jn in JOINT_NAMES]], + dtype=torch.float32, device=args.device, + ) + warmup_images = {} + for cam_key in cameras: + img = np.ascontiguousarray(warmup_obs[cam_key]) + if cam_key in flip_cameras: + img = cv2.rotate(img, cv2.ROTATE_180) + t = torch.from_numpy(img.copy()).float() / 255.0 + t = t.permute(2, 0, 1).unsqueeze(0).to(args.device) + warmup_images[cam_key] = t + + warmup_observation = Observation(state=state, images=warmup_images, task=args.task) + model._action_queue = deque() + with torch.no_grad(): + warmup_action = model.select_action(warmup_observation) + if args.device == "cuda": + torch.cuda.synchronize() + + warmup_actions = [warmup_action.detach().cpu().numpy()] + while len(model._action_queue) > 0: + a = model._action_queue.popleft() + warmup_actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a) + warmup_chunk = np.array([np.squeeze(a) for a in warmup_actions], dtype=np.float32) + else: + # OV warm-up + warmup_state = np.array( + [[warmup_obs[f"{jn}.pos"] for jn in JOINT_NAMES]], dtype=np.float32, + ) + warmup_input: dict[str, np.ndarray] = {"state": warmup_state, "task": [args.task]} + for cam_key in cameras: + img = np.ascontiguousarray(warmup_obs[cam_key]) + if cam_key in flip_cameras: + img = cv2.rotate(img, cv2.ROTATE_180) + img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) + img = img.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 + model_name = camera_name_map.get(cam_key, cam_key) + warmup_input[f"images.{model_name}"] = img + for blank_cam in blank_cameras: + warmup_input[f"images.{blank_cam}"] = np.zeros( + (1, 3, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32, + ) + warmup_out = model(warmup_input)["action"] + warmup_chunk = np.squeeze(warmup_out) + if warmup_chunk.ndim == 1: + warmup_chunk = warmup_chunk[np.newaxis, :] + warmup_chunk = warmup_chunk.astype(np.float32) + + warmup_latency = time.perf_counter() - t0 + chunk_size = warmup_chunk.shape[0] + action_dim = warmup_chunk.shape[1] + print(f"[init] Warm-up done: chunk_size={chunk_size}, action_dim={action_dim}, " + f"latency={warmup_latency*1000:.1f}ms", flush=True) + + # --- Init components --- + queue_mixer = QueueMixer(lerp_duration=args.lerp_duration) + queue_mixer.add(warmup_chunk, offset=0) + + inference_thread = InferenceThread( + model, backend, args.device, args.task, flip_cameras, + camera_name_map=camera_name_map, blank_cameras=blank_cameras, + ) + inference_thread.start() + + goal_time = 1.0 / args.fps + threshold = int(chunk_size * args.queue_threshold) + max_delta_per_step = args.max_speed / args.fps # degrees per step + step = 0 + actions_from_queue = 0 + hold_count = 0 + last_action: np.ndarray = warmup_chunk[0] + + # Initialize commanded position from current robot state (degrees/[0,100]) + init_obs = robot.get_observation() + commanded_pos = np.array( + [init_obs[f"{jn}.pos"] for jn in JOINT_NAMES], dtype=np.float32, + ) + print(f"[init] Current joint pos: {commanded_pos}", flush=True) + print(f"[init] Max speed: {args.max_speed:.0f} deg/s → {max_delta_per_step:.1f} per step @ {args.fps}Hz", flush=True) + print(f"[init] Ramp-up: {args.ramp_steps} steps", flush=True) + print("[run] Starting — Ctrl+C to stop", flush=True) + + try: + while True: + loop_start = time.perf_counter() + + # 1. Check for inference results + result = inference_thread.get_result() + if result is not None: + chunk, latency = result + offset = int(latency * args.fps) + logger.info( + "[Control] Got inference #%d: chunk=%s, latency=%.1fms, offset=%d", + inference_thread.inference_count, chunk.shape, latency * 1000, offset, + ) + queue_mixer.add(chunk, offset=offset) + queue_mixer.lerp_duration = max(offset, 1) + + # 2. Maybe request new inference (with stuck/dead detection) + if queue_mixer.remaining <= threshold: + if not inference_thread.alive: + logger.error("[Control] Inference thread DEAD — restarting") + inference_thread = InferenceThread( + model, backend, args.device, args.task, flip_cameras, + camera_name_map=camera_name_map, blank_cameras=blank_cameras, + ) + inference_thread.start() + elif inference_thread.busy_duration > args.infer_timeout: + logger.warning( + "[Control] Inference stuck for %.0fs — force resetting", + inference_thread.busy_duration, + ) + inference_thread.force_reset() + + if not inference_thread.busy: + obs = get_full_observation(robot, cameras) + submitted = inference_thread.request(obs) + if submitted: + logger.info( + "[Control] Requested inference (queue_remaining=%d, threshold=%d)", + queue_mixer.remaining, threshold, + ) + + # 3. Pop action from queue + action = queue_mixer.pop() + if action is not None: + last_action = action + actions_from_queue += 1 + hold_count = 0 + else: + action = last_action + hold_count += 1 + if hold_count == 1 or hold_count % 30 == 0: + logger.warning( + "[Control] Queue empty, holding position (hold_count=%d)", hold_count, + ) + + # 4. Velocity clamp and send + if step < args.ramp_steps: + ramp_frac = 0.25 + 0.75 * (step / args.ramp_steps) + effective_delta = max_delta_per_step * ramp_frac + else: + effective_delta = max_delta_per_step + + delta = action - commanded_pos + clamped_delta = np.clip(delta, -effective_delta, effective_delta) + commanded_pos = (commanded_pos + clamped_delta).astype(np.float32) + robot.send_action(action_to_robot_dict(commanded_pos)) + + # 5. Timing + elapsed = time.perf_counter() - loop_start + sleep_time = goal_time - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + step += 1 + if step % 100 == 0: + actual_hz = 1.0 / max(time.perf_counter() - loop_start, 1e-6) + logger.info( + "[Control] Step %d | %.1f Hz | queue=%d | from_queue=%d | holds=%d | inferences=%d", + step, actual_hz, queue_mixer.remaining, + actions_from_queue, hold_count, inference_thread.inference_count, + ) + + except KeyboardInterrupt: + logger.info("[Control] Interrupted by user") + finally: + inference_thread.stop() + for name, cam in cameras.items(): + cam.disconnect() + logger.info("Camera '%s' disconnected", name) + robot.disconnect() + logger.info( + "[Control] Cleanup complete — %d steps, %d actions from queue, %d holds, %d inferences", + step, actions_from_queue, hold_count, inference_thread.inference_count, + ) + + +if __name__ == "__main__": + main(parse_args()) diff --git a/examples/runtime/inference_pi05_gpu.ipynb b/examples/runtime/inference_pi05_gpu.ipynb new file mode 100644 index 0000000..fcd656c --- /dev/null +++ b/examples/runtime/inference_pi05_gpu.ipynb @@ -0,0 +1,838 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "68d10b89", + "metadata": {}, + "source": [ + "# Pi05 GPU Inference (PyTorch + CUDA)\n", + "\n", + "Loads Eugene's finetuned Pi05 checkpoint and runs inference on RTX 3090.\n", + "Expected: ~1-5s per action chunk (vs ~90s on CPU with OpenVINO)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf8f6bdd", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import numpy as np\n", + "import cv2\n", + "from collections import deque\n", + "from matplotlib import pyplot as plt\n", + "\n", + "print(f\"CUDA available: {torch.cuda.is_available()}\")\n", + "if torch.cuda.is_available():\n", + " print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n", + " print(f\"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98f8f8c7", + "metadata": {}, + "outputs": [], + "source": [ + "# --- Config ---\n", + "CHECKPOINT_PATH = \"pi05/pi05_eugene.ckpt\"\n", + "DEVICE = \"cuda\" # \"cuda\" or \"cpu\"\n", + "TASK = \"pick a can and place it in a bowl\"\n", + "\n", + "JOINT_NAMES = [\n", + " \"shoulder_pan\",\n", + " \"shoulder_lift\",\n", + " \"elbow_flex\",\n", + " \"wrist_flex\",\n", + " \"wrist_roll\",\n", + " \"gripper\",\n", + "]\n", + "\n", + "# Camera config (SharedCamera UVC device indices)\n", + "OVERHEAD_CAMERA_INDEX = \"/dev/v4l/by-id/usb-UGREEN_Camera_2K_UGREEN_Camera_2K_SN0001-video-index0\"\n", + "ARM_CAMERA_INDEX = \"/dev/v4l/by-id/usb-Innomaker_Innomaker-U20CAM-1080p-S1_SN0001-video-index0\"\n", + "FLIP_CAMERAS = {\"arm\"} # Cameras mounted upside down\n", + "\n", + "CAMERA_WIDTH = 640\n", + "CAMERA_FPS = 30\n", + "CAMERA_HEIGHT = 480" + ] + }, + { + "cell_type": "markdown", + "id": "8f32365d", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dce2df9", + "metadata": {}, + "outputs": [], + "source": [ + "from physicalai.policies.pi05 import Pi05\n", + "\n", + "# Load to CPU first to avoid OOM if GPU has residual allocations\n", + "policy = Pi05.load_from_checkpoint(CHECKPOINT_PATH, map_location=\"cpu\", compile_model=False)\n", + "policy = policy.to(DEVICE)\n", + "policy.eval()\n", + "print(f\"Model loaded on {DEVICE}\")\n", + "print(f\"VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")" + ] + }, + { + "cell_type": "markdown", + "id": "ad387d02", + "metadata": {}, + "source": [ + "## Connect Cameras & Robot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e022b604", + "metadata": {}, + "outputs": [], + "source": [ + "from cv2_enumerate_cameras import enumerate_cameras\n", + "\n", + "print(\"Available cameras:\")\n", + "for c in enumerate_cameras(apiPreference=cv2.CAP_GSTREAMER):\n", + " print(f\" - {c.name} (index {c.index})\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8c0a963", + "metadata": {}, + "outputs": [], + "source": [ + "from physicalai.capture import SharedCamera\n", + "from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig\n", + "\n", + "# Cameras via SharedCamera (UVC)\n", + "cameras = {\n", + " \"overhead\": SharedCamera(\"uvc\", device=OVERHEAD_CAMERA_INDEX, width=CAMERA_WIDTH, height=CAMERA_HEIGHT, fps=CAMERA_FPS),\n", + " \"arm\": SharedCamera(\"uvc\", device=ARM_CAMERA_INDEX, width=CAMERA_WIDTH, height=CAMERA_HEIGHT, fps=CAMERA_FPS),\n", + "}\n", + "for name, cam in cameras.items():\n", + " cam.connect()\n", + " print(f\"Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps\")\n", + "\n", + "# Robot without cameras\n", + "robot_cfg = SO101FollowerConfig(\n", + " port=\"/dev/ttyACM1\",\n", + " id=\"my_so101_follower\",\n", + "\n", + ")\n", + "\n", + "robot = SO101Follower(robot_cfg)\n", + "robot.connect()\n", + "print(\"Robot connected\")" + ] + }, + { + "cell_type": "markdown", + "id": "049e73d1", + "metadata": {}, + "source": [ + "## Get Observation & Display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1630af6", + "metadata": {}, + "outputs": [], + "source": [ + "def get_full_observation() -> dict:\n", + " \"\"\"Combine robot state + SharedCamera images into one dict.\"\"\"\n", + " obs = robot.get_observation()\n", + " for cam_key, cam in cameras.items():\n", + " frame = cam.read_latest()\n", + " obs[cam_key] = frame.data # numpy HWC RGB\n", + " return obs\n", + "\n", + "def viz_observation(obs: dict):\n", + " print(\"Joint positions:\")\n", + " for jn in JOINT_NAMES:\n", + " print(f\" {jn}: {obs[f'{jn}.pos']:.4f}\")\n", + "\n", + " fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n", + " for ax, cam_key in zip(axes, [\"overhead\", \"arm\"]):\n", + " img = np.ascontiguousarray(obs[cam_key])\n", + " if cam_key in FLIP_CAMERAS:\n", + " img = cv2.rotate(img, cv2.ROTATE_180)\n", + " ax.imshow(img)\n", + " ax.set_title(f\"{cam_key} ({img.shape})\")\n", + " ax.axis(\"off\")\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "obs = get_full_observation()\n", + "viz_observation(obs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ef87e90", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "c899db7a", + "metadata": {}, + "source": [ + "## Build Observation for Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb2ceb63", + "metadata": {}, + "outputs": [], + "source": [ + "from physicalai.data.observation import Observation\n", + "\n", + "def build_observation(obs: dict) -> Observation:\n", + " \"\"\"Convert robot observation to Pi05 Observation.\"\"\"\n", + " # State: (1, 6) float32\n", + " state = torch.tensor(\n", + " [[obs[f\"{jn}.pos\"] for jn in JOINT_NAMES]], dtype=torch.float32, device=DEVICE\n", + " )\n", + "\n", + " # Images: {name: (1, 3, H, W) float32 in [0, 1]}\n", + " def img_to_tensor(img: np.ndarray) -> torch.Tensor:\n", + " t = torch.from_numpy(img.copy()).float() / 255.0 # (H, W, 3) -> [0,1]\n", + " t = t.permute(2, 0, 1).unsqueeze(0) # (1, 3, H, W)\n", + " return t.to(DEVICE)\n", + "\n", + " images = {}\n", + " for cam_key in [\"overhead\", \"arm\"]:\n", + " img = np.ascontiguousarray(obs[cam_key])\n", + " if cam_key in FLIP_CAMERAS:\n", + " img = cv2.rotate(img, cv2.ROTATE_180)\n", + " images[cam_key] = img_to_tensor(img)\n", + "\n", + " return Observation(state=state, images=images, task=TASK)\n", + "\n", + "observation = build_observation(get_full_observation())\n", + "print(f\"State shape: {observation.state.shape}\")\n", + "for k, v in observation.images.items():\n", + " print(f\"Image '{k}': {v.shape}, device={v.device}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3a0557e9", + "metadata": {}, + "source": [ + "## Run Inference (single step, timed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5167ad02", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "# Reset action queue to get a fresh prediction\n", + "policy._action_queue = deque()\n", + "\n", + "# Warm up GPU\n", + "if DEVICE == \"cuda\":\n", + " torch.cuda.synchronize()\n", + "\n", + "t0 = time.perf_counter()\n", + "with torch.no_grad():\n", + " action = policy.select_action(observation)\n", + "if DEVICE == \"cuda\":\n", + " torch.cuda.synchronize()\n", + "latency = time.perf_counter() - t0\n", + "\n", + "action_np = action.detach().cpu().numpy()\n", + "if action_np.ndim == 2:\n", + " action_np = action_np[0]\n", + "\n", + "print(f\"Inference latency: {latency*1000:.1f} ms\")\n", + "print(f\"Action shape: {action_np.shape}\")\n", + "print(f\"Action: {action_np}\")\n", + "print()\n", + "for i, jn in enumerate(JOINT_NAMES):\n", + " print(f\" {jn}: {action_np[i]:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "130d146d", + "metadata": {}, + "source": [ + "## Benchmark (5 inferences)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a7dd57d", + "metadata": {}, + "outputs": [], + "source": [ + "latencies = []\n", + "for i in range(5):\n", + " policy._action_queue = deque()\n", + " if DEVICE == \"cuda\":\n", + " torch.cuda.synchronize()\n", + " t0 = time.perf_counter()\n", + " with torch.no_grad():\n", + " action = policy.select_action(observation)\n", + " if DEVICE == \"cuda\":\n", + " torch.cuda.synchronize()\n", + " latencies.append(time.perf_counter() - t0)\n", + "\n", + "print(f\"Latencies: {[f'{l*1000:.1f}ms' for l in latencies]}\")\n", + "print(f\"Mean: {np.mean(latencies)*1000:.1f} ms\")\n", + "print(f\"Min: {np.min(latencies)*1000:.1f} ms\")\n", + "print(f\"Max: {np.max(latencies)*1000:.1f} ms\")" + ] + }, + { + "cell_type": "markdown", + "id": "72d136ad", + "metadata": {}, + "source": [ + "## Send Action to Robot\n", + "\n", + "**Run only when ready to move.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21aeb2ac", + "metadata": {}, + "outputs": [], + "source": [ + "action_dict = {f\"{jn}.pos\": float(action_np[i]) for i, jn in enumerate(JOINT_NAMES)}\n", + "robot.send_action(action_dict)\n", + "print(f\"Sent: {action_dict}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f7084ab5", + "metadata": {}, + "source": [ + "## Rollout Loop (observe → infer → execute full chunk)\n", + "\n", + "Runs a continuous loop: get observation, predict action chunk, execute all actions in the chunk at 30Hz, then repeat. **Interrupt the kernel to stop.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66acc85f", + "metadata": {}, + "outputs": [], + "source": [ + "# FPS = 30\n", + "# CHUNK_SIZE = 50 # Pi05 default action chunk length\n", + "\n", + "# try:\n", + "# step = 0\n", + "# while True:\n", + "# # Get fresh observation\n", + "# obs = get_full_observation()\n", + "# observation = build_observation(obs)\n", + "\n", + "# # Generate full action chunk\n", + "# policy._action_queue = deque()\n", + "# t0 = time.perf_counter()\n", + "# with torch.no_grad():\n", + "# action = policy.select_action(observation)\n", + "# infer_ms = (time.perf_counter() - t0) * 1000\n", + "\n", + "# # Drain remaining actions from the queue (select_action returns first, rest are queued)\n", + "# actions = [action.detach().cpu().numpy()]\n", + "# while len(policy._action_queue) > 0:\n", + "# a = policy._action_queue.popleft()\n", + "# actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", + "\n", + "# print(f\"[step {step}] Inferred {len(actions)} actions in {infer_ms:.0f}ms\")\n", + "\n", + "# # Execute each action in the chunk at target FPS\n", + "# for i, act in enumerate(actions):\n", + "# t_act = time.perf_counter()\n", + "# act = np.squeeze(act)\n", + "# action_dict = {f\"{jn}.pos\": float(act[j]) for j, jn in enumerate(JOINT_NAMES)}\n", + "# robot.send_action(action_dict)\n", + "\n", + "# # Sleep to maintain target FPS\n", + "# elapsed = time.perf_counter() - t_act\n", + "# sleep_time = (1.0 / FPS) - elapsed\n", + "# if sleep_time > 0:\n", + "# time.sleep(sleep_time)\n", + "\n", + "# step += 1\n", + "\n", + "# except KeyboardInterrupt:\n", + "# print(f\"\\nStopped after {step} chunks\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd3ce7cc", + "metadata": {}, + "source": [ + "## Async Threaded Rollout\n", + "\n", + "Background thread runs GPU inference while the main thread sends actions at 30Hz.\n", + "Lerp-blends overlapping chunks for smooth transitions. **Interrupt the kernel to stop.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d054a13", + "metadata": {}, + "outputs": [], + "source": [ + "import threading\n", + "\n", + "class QueueMixer:\n", + " \"\"\"Action queue with lerp blending between chunks.\"\"\"\n", + "\n", + " def __init__(self, lerp_duration: int = 5):\n", + " self.queue: np.ndarray | None = None\n", + " self.lerp_duration = lerp_duration\n", + " self.index = 0\n", + "\n", + " def add(self, chunk: np.ndarray, offset: int = 0):\n", + " if self.queue is None or self.index >= len(self.queue):\n", + " self.queue = chunk[offset:]\n", + " self.index = 0\n", + " return\n", + "\n", + " remaining = self.queue[self.index:]\n", + " incoming = chunk[offset:]\n", + " n_remain = len(remaining)\n", + " lerp_dur = min(n_remain, self.lerp_duration)\n", + "\n", + " weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0)[:, np.newaxis]\n", + " n_blend = min(n_remain, len(incoming))\n", + " blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend]\n", + " self.queue = np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32)\n", + " self.index = 0\n", + "\n", + " def pop(self) -> np.ndarray | None:\n", + " if self.queue is None or self.index >= len(self.queue):\n", + " return None\n", + " action = self.queue[self.index]\n", + " self.index += 1\n", + " return action\n", + "\n", + " @property\n", + " def remaining(self) -> int:\n", + " if self.queue is None:\n", + " return 0\n", + " return max(len(self.queue) - self.index, 0)\n", + "\n", + "\n", + "class GpuInferenceThread:\n", + " \"\"\"Background thread that runs PyTorch GPU inference on demand.\"\"\"\n", + "\n", + " def __init__(self, policy, device: str):\n", + " self.policy = policy\n", + " self.device = device\n", + " self._lock = threading.Lock()\n", + " self._obs_slot: dict | None = None\n", + " self._obs_ready = threading.Event()\n", + " self._running_inference = False\n", + " self._request_time = 0.0\n", + " self._result_lock = threading.Lock()\n", + " self._result_slot: tuple[np.ndarray, float] | None = None\n", + " self._stop = threading.Event()\n", + " self._thread = threading.Thread(target=self._run, name=\"GpuInferenceThread\", daemon=True)\n", + " self.inference_count = 0\n", + "\n", + " def start(self):\n", + " self._thread.start()\n", + "\n", + " def stop(self):\n", + " self._stop.set()\n", + " self._obs_ready.set()\n", + " self._thread.join(timeout=10.0)\n", + "\n", + " def request(self, obs_dict: dict) -> bool:\n", + " with self._lock:\n", + " if self._obs_slot is not None or self._running_inference:\n", + " return False\n", + " self._obs_slot = obs_dict\n", + " self._request_time = time.perf_counter()\n", + " self._obs_ready.set()\n", + " return True\n", + "\n", + " @property\n", + " def busy(self) -> bool:\n", + " with self._lock:\n", + " return self._obs_slot is not None or self._running_inference\n", + "\n", + " @property\n", + " def alive(self) -> bool:\n", + " return self._thread.is_alive()\n", + "\n", + " @property\n", + " def busy_duration(self) -> float:\n", + " \"\"\"Seconds since last request was submitted. 0 if not busy.\"\"\"\n", + " with self._lock:\n", + " if not (self._obs_slot is not None or self._running_inference):\n", + " return 0.0\n", + " return time.perf_counter() - self._request_time\n", + "\n", + " def force_reset(self):\n", + " \"\"\"Clear stuck state so new requests can be submitted.\"\"\"\n", + " with self._lock:\n", + " self._obs_slot = None\n", + " self._running_inference = False\n", + " print(\"[InferenceThread] Force reset — cleared stuck state\")\n", + "\n", + " def get_result(self) -> tuple[np.ndarray, float] | None:\n", + " with self._result_lock:\n", + " r = self._result_slot\n", + " self._result_slot = None\n", + " return r\n", + "\n", + " def _run(self):\n", + " while not self._stop.is_set():\n", + " self._obs_ready.wait()\n", + " self._obs_ready.clear()\n", + " if self._stop.is_set():\n", + " break\n", + "\n", + " with self._lock:\n", + " obs_dict = self._obs_slot\n", + " self._obs_slot = None\n", + " self._running_inference = True\n", + "\n", + " if obs_dict is None:\n", + " with self._lock:\n", + " self._running_inference = False\n", + " continue\n", + "\n", + " self.inference_count += 1\n", + " t0 = time.perf_counter()\n", + "\n", + " try:\n", + " observation = build_observation(obs_dict)\n", + " self.policy._action_queue = deque()\n", + "\n", + " with torch.no_grad():\n", + " action = self.policy.select_action(observation)\n", + " if self.device == \"cuda\":\n", + " torch.cuda.synchronize()\n", + "\n", + " # Collect full chunk\n", + " actions = [action.detach().cpu().numpy()]\n", + " while len(self.policy._action_queue) > 0:\n", + " a = self.policy._action_queue.popleft()\n", + " actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", + "\n", + " chunk = np.array([np.squeeze(a) for a in actions], dtype=np.float32)\n", + " except Exception as e:\n", + " import traceback\n", + " print(f\"[InferenceThread] Error: {e}\")\n", + " traceback.print_exc()\n", + " with self._lock:\n", + " self._running_inference = False\n", + " continue\n", + "\n", + " latency = time.perf_counter() - t0\n", + " with self._result_lock:\n", + " self._result_slot = (chunk, latency)\n", + " with self._lock:\n", + " self._running_inference = False\n", + "\n", + "print(\"QueueMixer and GpuInferenceThread defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664e5887", + "metadata": {}, + "outputs": [], + "source": [ + "ASYNC_FPS = 30\n", + "QUEUE_THRESHOLD_FRAC = 0.5 # Request new inference when queue drops below this fraction\n", + "LERP_DURATION = 5 # Frames over which to blend old/new chunks\n", + "PLOT_INTERVAL = 1 # Seconds between live plot updates\n", + "\n", + "# --- Warm-up to determine chunk size ---\n", + "warmup_obs = get_full_observation()\n", + "warmup_observation = build_observation(warmup_obs)\n", + "policy._action_queue = deque()\n", + "with torch.no_grad():\n", + " warmup_action = policy.select_action(warmup_observation)\n", + "warmup_actions = [warmup_action.detach().cpu().numpy()]\n", + "while len(policy._action_queue) > 0:\n", + " a = policy._action_queue.popleft()\n", + " warmup_actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", + "warmup_chunk = np.array([np.squeeze(a) for a in warmup_actions], dtype=np.float32)\n", + "chunk_size = warmup_chunk.shape[0]\n", + "threshold = int(chunk_size * QUEUE_THRESHOLD_FRAC)\n", + "print(f\"Chunk size: {chunk_size}, threshold: {threshold}\")\n", + "\n", + "# --- Telemetry collector ---\n", + "telemetry = {\n", + " \"t\": [], # wall time per step\n", + " \"actions_sent\": [], # action_array for each action sent\n", + " \"queue_depth\": [], # (t, remaining) per step\n", + " \"infer_requests\": [], # t when inference was requested\n", + " \"infer_completions\": [], # (t, latency_ms, offset)\n", + " \"chunks_received\": [], # (t_received, chunk_array, inference_num)\n", + " \"holds\": [], # t when queue was empty (holding position)\n", + "}\n", + "t_start = time.perf_counter()\n", + "\n", + "# --- Live plotting ---\n", + "import matplotlib.patches as mpatches\n", + "from IPython.display import display\n", + "\n", + "def render_telemetry(telemetry, threshold, chunk_size):\n", + " \"\"\"Render telemetry to a matplotlib figure.\"\"\"\n", + " actions_arr = np.array(telemetry[\"actions_sent\"])\n", + " t_arr = np.array(telemetry[\"t\"])\n", + " if len(t_arr) < 2:\n", + " return None\n", + "\n", + " queue_t, queue_d = zip(*telemetry[\"queue_depth\"])\n", + " n_joints = len(JOINT_NAMES)\n", + " n_chunks = len(telemetry[\"chunks_received\"])\n", + " chunk_cmap = plt.cm.Set2\n", + "\n", + " fig, axes = plt.subplots(n_joints + 1, 1, figsize=(16, 2.5 * (n_joints + 1)), sharex=True)\n", + "\n", + " for j, jn in enumerate(JOINT_NAMES):\n", + " ax = axes[j]\n", + " ax.plot(t_arr, actions_arr[:, j], color=\"black\", linewidth=1.2, zorder=3)\n", + "\n", + " for idx, (t_recv, chunk, _) in enumerate(telemetry[\"chunks_received\"]):\n", + " color = chunk_cmap(idx % 8)\n", + " chunk_t = t_recv + np.arange(len(chunk)) / ASYNC_FPS\n", + " ax.plot(chunk_t, chunk[:, j], color=color, alpha=0.4, linewidth=1.5, zorder=2)\n", + "\n", + " for t_req in telemetry[\"infer_requests\"]:\n", + " ax.axvline(t_req, color=\"green\", alpha=0.3, linewidth=0.8, linestyle=\"--\")\n", + " for t_comp, _, _ in telemetry[\"infer_completions\"]:\n", + " ax.axvline(t_comp, color=\"blue\", alpha=0.3, linewidth=0.8, linestyle=\"-.\")\n", + " for ht in telemetry[\"holds\"]:\n", + " ax.axvspan(ht, ht + 1/ASYNC_FPS, color=\"red\", alpha=0.15)\n", + "\n", + " ax.set_ylabel(jn, fontsize=9)\n", + " ax.tick_params(labelsize=8)\n", + "\n", + " ax_q = axes[-1]\n", + " ax_q.fill_between(queue_t, queue_d, alpha=0.3, color=\"steelblue\")\n", + " ax_q.plot(queue_t, queue_d, color=\"steelblue\", linewidth=1)\n", + " ax_q.axhline(threshold, color=\"orange\", linestyle=\"--\", linewidth=1, label=f\"threshold ({threshold})\")\n", + " for t_req in telemetry[\"infer_requests\"]:\n", + " ax_q.axvline(t_req, color=\"green\", alpha=0.5, linewidth=1, linestyle=\"--\")\n", + " for t_comp, _, _ in telemetry[\"infer_completions\"]:\n", + " ax_q.axvline(t_comp, color=\"blue\", alpha=0.5, linewidth=1, linestyle=\"-.\")\n", + " ax_q.set_ylabel(\"Queue\", fontsize=9)\n", + " ax_q.set_xlabel(\"Time (s)\", fontsize=10)\n", + " ax_q.legend(loc=\"upper right\", fontsize=8)\n", + " ax_q.tick_params(labelsize=8)\n", + "\n", + " legend_handles = [\n", + " mpatches.Patch(color=\"black\", label=\"Sent to robot\"),\n", + " mpatches.Patch(color=\"green\", alpha=0.5, label=\"Infer requested\"),\n", + " mpatches.Patch(color=\"blue\", alpha=0.5, label=\"Chunk received\"),\n", + " mpatches.Patch(color=\"red\", alpha=0.3, label=\"Hold\"),\n", + " ]\n", + " for idx in range(min(n_chunks, 6)):\n", + " legend_handles.append(mpatches.Patch(color=chunk_cmap(idx % 8), alpha=0.5, label=f\"Chunk #{idx+1}\"))\n", + " fig.legend(handles=legend_handles, loc=\"upper center\", ncol=min(len(legend_handles), 5), fontsize=8, bbox_to_anchor=(0.5, 1.02))\n", + "\n", + " n_holds = len(telemetry[\"holds\"])\n", + " fig.suptitle(f\"Live — {len(t_arr)} steps, {n_chunks} inferences, {n_holds} holds, t={t_arr[-1]:.1f}s\", fontsize=11, y=1.04)\n", + " plt.tight_layout()\n", + " return fig\n", + "\n", + "# Show initial empty plot placeholder\n", + "plot_handle = display(plt.figure(figsize=(16, 2)), display_id=True)\n", + "plt.close()\n", + "\n", + "# --- Init ---\n", + "queue = QueueMixer(lerp_duration=LERP_DURATION)\n", + "queue.add(warmup_chunk) # Seed so robot can move immediately\n", + "\n", + "infer_thread = GpuInferenceThread(policy, DEVICE)\n", + "infer_thread.start()\n", + "\n", + "goal_time = 1.0 / ASYNC_FPS\n", + "last_action = warmup_chunk[0]\n", + "step = 0\n", + "hold_count = 0\n", + "last_plot_time = 0.0\n", + "\n", + "print(f\"Async rollout at {ASYNC_FPS} Hz — interrupt kernel to stop\")\n", + "\n", + "try:\n", + " while True:\n", + " loop_start = time.perf_counter()\n", + " t_rel = loop_start - t_start\n", + "\n", + " # 1. Check for inference result\n", + " result = infer_thread.get_result()\n", + " if result is not None:\n", + " chunk, latency = result\n", + " offset = int(latency * ASYNC_FPS)\n", + " queue.add(chunk, offset=offset)\n", + " queue.lerp_duration = max(offset, 1)\n", + " telemetry[\"infer_completions\"].append((t_rel, latency * 1000, offset))\n", + " telemetry[\"chunks_received\"].append((t_rel, chunk.copy(), infer_thread.inference_count))\n", + " print(f\"[t={t_rel:.1f}s] <<< inference #{infer_thread.inference_count}: \"\n", + " f\"{latency*1000:.0f}ms, offset={offset}, queue={queue.remaining}\")\n", + "\n", + " # 2. Request new inference when queue is low (with stuck/dead detection)\n", + " INFER_TIMEOUT = 15.0\n", + " if queue.remaining <= threshold:\n", + " if not infer_thread.alive:\n", + " print(f\"[t={t_rel:.1f}s] !!! Inference thread DEAD — restarting\")\n", + " infer_thread = GpuInferenceThread(policy, DEVICE)\n", + " infer_thread.start()\n", + " elif infer_thread.busy_duration > INFER_TIMEOUT:\n", + " print(f\"[t={t_rel:.1f}s] !!! Inference stuck for {infer_thread.busy_duration:.0f}s — force resetting\")\n", + " infer_thread.force_reset()\n", + "\n", + " if not infer_thread.busy:\n", + " obs = get_full_observation()\n", + " submitted = infer_thread.request(obs)\n", + " if submitted:\n", + " telemetry[\"infer_requests\"].append(t_rel)\n", + " print(f\"[t={t_rel:.1f}s] >>> inference requested (queue={queue.remaining}/{chunk_size})\")\n", + "\n", + " # 3. Pop action\n", + " action = queue.pop()\n", + " if action is not None:\n", + " last_action = action\n", + " hold_count = 0\n", + " else:\n", + " action = last_action\n", + " hold_count += 1\n", + " telemetry[\"holds\"].append(t_rel)\n", + " if hold_count == 1 or hold_count % 30 == 0:\n", + " print(f\"[t={t_rel:.1f}s] !!! Queue empty, holding (hold={hold_count})\")\n", + "\n", + " # 4. Send to robot\n", + " if action is not None:\n", + " action_dict = {f\"{jn}.pos\": float(action[i]) for i, jn in enumerate(JOINT_NAMES)}\n", + " robot.send_action(action_dict)\n", + "\n", + " # 5. Record telemetry\n", + " telemetry[\"t\"].append(t_rel)\n", + " telemetry[\"actions_sent\"].append(action.copy() if action is not None else last_action.copy())\n", + " telemetry[\"queue_depth\"].append((t_rel, queue.remaining))\n", + "\n", + " # 6. Live plot update (throttled)\n", + " if t_rel - last_plot_time > PLOT_INTERVAL and len(telemetry[\"t\"]) > 10:\n", + " fig = render_telemetry(telemetry, threshold, chunk_size)\n", + " if fig is not None:\n", + " plot_handle.update(fig)\n", + " plt.close(fig)\n", + " last_plot_time = t_rel\n", + "\n", + " # 7. Maintain FPS\n", + " elapsed = time.perf_counter() - loop_start\n", + " sleep_time = goal_time - elapsed\n", + " if sleep_time > 0:\n", + " time.sleep(sleep_time)\n", + "\n", + " step += 1\n", + "\n", + "except KeyboardInterrupt:\n", + " print(f\"\\nStopped after {step} steps, {infer_thread.inference_count} inferences, \"\n", + " f\"{len(telemetry['holds'])} holds, duration={time.perf_counter() - t_start:.1f}s\")\n", + "\n", + "infer_thread.stop()\n", + "print(\"Inference thread stopped\")\n", + "\n", + "# Final plot\n", + "fig = render_telemetry(telemetry, threshold, chunk_size)\n", + "if fig is not None:\n", + " plot_handle.update(fig)\n", + " plt.close(fig)\n", + "\n", + "if telemetry[\"infer_completions\"]:\n", + " latencies_ms = [lat for _, lat, _ in telemetry[\"infer_completions\"]]\n", + " t_arr = np.array(telemetry[\"t\"])\n", + " print(f\"\\nInference latencies: mean={np.mean(latencies_ms):.0f}ms, \"\n", + " f\"min={np.min(latencies_ms):.0f}ms, max={np.max(latencies_ms):.0f}ms\")\n", + " print(f\"Queue empty fraction: {len(telemetry['holds'])/max(len(t_arr),1)*100:.1f}%\")\n", + " print(f\"Total duration: {t_arr[-1]:.1f}s, steps: {len(t_arr)}, effective Hz: {len(t_arr)/t_arr[-1]:.1f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b0e2a88d", + "metadata": {}, + "source": [ + "## Disconnect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6eb8f97", + "metadata": {}, + "outputs": [], + "source": [ + "for name, cam in cameras.items():\n", + " cam.disconnect()\n", + "\n", + " print(f\"Camera '{name}' disconnected\")\n", + " \n", + "robot.disconnect()\n", + "print(\"Robot disconnected\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "physical-ai-studio (3.12.3)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/physicalai/runtime/__init__.py b/src/physicalai/runtime/__init__.py new file mode 100644 index 0000000..4be467a --- /dev/null +++ b/src/physicalai/runtime/__init__.py @@ -0,0 +1,43 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Runtime system for running trained policies on robot hardware. + +Public API:: + + from physicalai.runtime import PolicyRuntime, RunStats, RuntimeCallback + from physicalai.runtime import SyncExecution, AsyncExecution, Execution, WorkerDiedError + from physicalai.runtime import ActionQueue + from physicalai.runtime import ChunkSmoother, LerpSmoother, ReplaceSmoother + from physicalai.runtime import default_observation_to_input +""" + +from physicalai.runtime._action_queue import ActionQueue # noqa: PLC2701 +from physicalai.runtime.execution import ( + AsyncExecution, + Execution, + SyncExecution, + WorkerDiedError, +) +from physicalai.runtime.runtime import ( + PolicyRuntime, + RunStats, + RuntimeCallback, + default_observation_to_input, +) +from physicalai.runtime.smoothers import ChunkSmoother, LerpSmoother, ReplaceSmoother + +__all__ = [ + "ActionQueue", + "AsyncExecution", + "ChunkSmoother", + "Execution", + "LerpSmoother", + "PolicyRuntime", + "ReplaceSmoother", + "RunStats", + "RuntimeCallback", + "SyncExecution", + "WorkerDiedError", + "default_observation_to_input", +] diff --git a/src/physicalai/runtime/_action_queue.py b/src/physicalai/runtime/_action_queue.py new file mode 100644 index 0000000..af768a6 --- /dev/null +++ b/src/physicalai/runtime/_action_queue.py @@ -0,0 +1,72 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import threading +from collections import deque + +import numpy as np + +from physicalai.runtime.smoothers import ChunkSmoother, ReplaceSmoother + + +class ActionQueue: + """Thread-safe action queue with chunk smoothing.""" + + def __init__(self, smoother: ChunkSmoother | None = None) -> None: + self._smoother = smoother or ReplaceSmoother() + self._deque: deque[np.ndarray] = deque() + self._lock = threading.Lock() + self._consecutive_holds = 0 + self._total_holds = 0 + self._total_pops = 0 + + def push_chunk(self, chunk: np.ndarray, offset: int = 0) -> None: + """Push an action chunk, blending with remaining actions via the smoother.""" + with self._lock: + remaining = np.stack(list(self._deque)) if self._deque else np.empty((0, chunk.shape[1]), dtype=chunk.dtype) + merged = self._smoother.merge(remaining, chunk, offset) + self._deque.clear() + self._deque.extend(merged) + + def pop(self) -> np.ndarray | None: + """Pop the next action. + + Returns: + Single action vector, or None if empty. + """ + with self._lock: + if not self._deque: + self._consecutive_holds += 1 + self._total_holds += 1 + return None + self._consecutive_holds = 0 + self._total_pops += 1 + return self._deque.popleft() + + @property + def remaining(self) -> int: + with self._lock: + return len(self._deque) + + @property + def consecutive_holds(self) -> int: + return self._consecutive_holds + + @property + def total_holds(self) -> int: + return self._total_holds + + @property + def total_pops(self) -> int: + return self._total_pops + + def below_threshold(self, threshold: int) -> bool: + with self._lock: + return len(self._deque) < threshold + + def clear(self) -> None: + with self._lock: + self._deque.clear() + self._consecutive_holds = 0 diff --git a/src/physicalai/runtime/execution.py b/src/physicalai/runtime/execution.py new file mode 100644 index 0000000..28ec86c --- /dev/null +++ b/src/physicalai/runtime/execution.py @@ -0,0 +1,250 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Execution strategies for scheduling policy inference.""" + +from __future__ import annotations + +import logging +import threading +import time +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from physicalai.inference.model import InferenceModel + from physicalai.runtime._action_queue import ActionQueue + +logger = logging.getLogger(__name__) + +_NOT_STARTED = "start() must be called before this method" + + +class WorkerDiedError(RuntimeError): + """Raised when the inference worker thread dies unexpectedly.""" + + +class Execution(ABC): + """Decides when and where inference runs. Pushes results into ActionQueue.""" + + @abstractmethod + def start(self, model: InferenceModel, action_queue: ActionQueue) -> None: + """Bind to model and queue. Called once before the loop.""" + ... + + @abstractmethod + def maybe_request(self, observation: dict[str, np.ndarray]) -> None: + """Check if new inference is needed. If so, run or schedule it.""" + ... + + @abstractmethod + def warmup(self, sample_observation: dict[str, np.ndarray]) -> None: + """Run one inference to discover chunk_size and seed the queue.""" + ... + + @abstractmethod + def stop(self) -> None: + """Stop scheduling.""" + ... + + @property + @abstractmethod + def chunk_size(self) -> int: + """Discovered after warmup().""" + ... + + +class SyncExecution(Execution): + """Synchronous inference in the control thread.""" + + def __init__(self) -> None: # noqa: D107 + self._model: InferenceModel | None = None + self._queue: ActionQueue | None = None + self._chunk_size: int = 0 + + def start(self, model: InferenceModel, action_queue: ActionQueue) -> None: + """Bind model and queue.""" + self._model = model + self._queue = action_queue + + def warmup(self, sample_observation: dict[str, np.ndarray]) -> None: + """Run one inference, seed queue, discover chunk_size. + + Raises: + RuntimeError: If start() has not been called. + """ + if self._model is None or self._queue is None: + raise RuntimeError(_NOT_STARTED) + actions = self._model.predict_action_chunk(sample_observation) + self._chunk_size = actions.shape[0] + self._queue.push_chunk(actions, offset=0) + + def maybe_request(self, observation: dict[str, np.ndarray]) -> None: + """Refill queue synchronously when empty. + + Raises: + RuntimeError: If start() has not been called. + """ + if self._model is None or self._queue is None: + raise RuntimeError(_NOT_STARTED) + if self._queue.below_threshold(1): + actions = self._model.predict_action_chunk(observation) + self._queue.push_chunk(actions, offset=0) + + def stop(self) -> None: + """No-op for synchronous execution.""" + + @property + def chunk_size(self) -> int: + """Return discovered chunk size.""" + return self._chunk_size + + +class AsyncExecution(Execution): + """Async inference in a background thread with health monitoring.""" + + def __init__( # noqa: D107 + self, + threshold: float = 0.5, + fps: int = 30, + watchdog_timeout_s: float = 30.0, + max_consecutive_holds: int | None = None, + ) -> None: + self._threshold_frac = threshold + self._fps = fps + self._watchdog_timeout_s = watchdog_timeout_s + self._max_consecutive_holds = max_consecutive_holds or 3 * fps + + self._model: InferenceModel | None = None + self._queue: ActionQueue | None = None + self._chunk_size: int = 0 + self._threshold_count: int = 0 + + self._lock = threading.Lock() + self._obs_slot: dict[str, np.ndarray] | None = None + self._obs_ready = threading.Event() + self._running_inference = False + self._request_time: float = 0.0 + self._stop_event = threading.Event() + self._thread: threading.Thread | None = None + self._death_cause: BaseException | None = None + self._inference_count: int = 0 + + def start(self, model: InferenceModel, action_queue: ActionQueue) -> None: + """Bind model/queue and spawn inference thread.""" + self._model = model + self._queue = action_queue + self._thread = threading.Thread(target=self._run, name="InferenceThread", daemon=True) + self._thread.start() + + def warmup(self, sample_observation: dict[str, np.ndarray]) -> None: + """Run one inference in main thread, seed queue, discover chunk_size. + + Raises: + RuntimeError: If start() has not been called. + """ + if self._model is None or self._queue is None: + raise RuntimeError(_NOT_STARTED) + actions = self._model.predict_action_chunk(sample_observation) + self._chunk_size = actions.shape[0] + self._threshold_count = int(self._chunk_size * self._threshold_frac) + self._queue.push_chunk(actions, offset=0) + + def maybe_request(self, observation: dict[str, np.ndarray]) -> None: + """Submit observation for background inference if queue is low and worker idle. + + Raises: + RuntimeError: If start() has not been called. + WorkerDiedError: If the inference thread has died. + """ + if self._queue is None: + raise RuntimeError(_NOT_STARTED) + if self._thread is not None and not self._thread.is_alive() and self._death_cause is not None: + msg = f"Inference thread died: {self._death_cause}" + raise WorkerDiedError(msg) from self._death_cause + + if self._busy_duration > self._watchdog_timeout_s: + logger.warning("Inference stuck for %.0fs — force resetting", self._busy_duration) + self._force_reset() + + if self._queue.below_threshold(self._threshold_count) and not self._busy: + snapshot = {k: v.copy() if isinstance(v, np.ndarray) else v for k, v in observation.items()} + with self._lock: + self._obs_slot = snapshot + self._request_time = time.perf_counter() + self._obs_ready.set() + + def stop(self) -> None: + """Signal thread and join with timeout.""" + if self._thread is not None: + self._stop_event.set() + self._obs_ready.set() + self._thread.join(timeout=10.0) + + @property + def chunk_size(self) -> int: + """Return discovered chunk size.""" + return self._chunk_size + + @property + def alive(self) -> bool: + """Whether the inference thread is alive.""" + return self._thread is not None and self._thread.is_alive() + + @property + def inference_count(self) -> int: + """Number of completed inference calls.""" + return self._inference_count + + @property + def _busy(self) -> bool: + with self._lock: + return self._obs_slot is not None or self._running_inference + + @property + def _busy_duration(self) -> float: + with self._lock: + if not (self._obs_slot is not None or self._running_inference): + return 0.0 + return time.perf_counter() - self._request_time + + def _force_reset(self) -> None: + with self._lock: + self._obs_slot = None + self._running_inference = False + logger.warning("Force reset — cleared stuck inference state") + + def _run(self) -> None: + try: + while not self._stop_event.is_set(): + self._obs_ready.wait() + self._obs_ready.clear() + + if self._stop_event.is_set(): + return + + with self._lock: + obs = self._obs_slot + self._obs_slot = None + if obs is None: + continue + self._running_inference = True + + if self._model is None or self._queue is None: + raise RuntimeError(_NOT_STARTED) # noqa: TRY301 + t0 = time.perf_counter() + actions = self._model.predict_action_chunk(obs) + latency = time.perf_counter() - t0 + + offset = int(latency * self._fps) + self._queue.push_chunk(actions, offset=offset) + self._inference_count += 1 + + with self._lock: + self._running_inference = False + + except Exception as e: + self._death_cause = e + logger.exception("Inference thread died") diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py new file mode 100644 index 0000000..bb92225 --- /dev/null +++ b/src/physicalai/runtime/runtime.py @@ -0,0 +1,226 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""PolicyRuntime — runs a trained policy on robot hardware.""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Protocol + +import numpy as np + +from physicalai.runtime._action_queue import ActionQueue # noqa: PLC2701 +from physicalai.runtime.execution import Execution, WorkerDiedError +from physicalai.runtime.smoothers import LerpSmoother + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, Sequence + + from physicalai.capture.camera import Camera + from physicalai.capture.frame import Frame + from physicalai.inference.model import InferenceModel + from physicalai.robot.interface import Robot, RobotObservation + +logger = logging.getLogger(__name__) + +_DEFAULT_LERP_FRAMES = 5 + + +def default_observation_to_input( + robot_obs: RobotObservation, + camera_frames: dict[str, Frame], +) -> dict[str, Any]: + """Convert robot observation and camera frames to model input dict. + + Maps: + - ``robot_obs.joint_positions`` → ``"state"`` (as batch dim) + - ``frame.data`` per camera → ``"images.{name}"`` + + Returns: + Model input dictionary. + """ + model_input: dict[str, Any] = {} + + if robot_obs.joint_positions is not None: + model_input["state"] = np.array([robot_obs.joint_positions], dtype=np.float32) + + for name, frame in camera_frames.items(): + model_input[f"images.{name}"] = frame.data + + return model_input + + +class RuntimeCallback(Protocol): + """Optional hook points in the PolicyRuntime control loop.""" + + def before_send_action(self, *, action: np.ndarray, step: int) -> np.ndarray | None: + """Called before sending action. Return modified action or None.""" + ... + + def on_action_sent(self, *, action: np.ndarray, step: int) -> None: + """Called after action is sent to robot.""" + ... + + def on_hold(self, *, step: int, holds: int) -> None: + """Called when action queue is empty and robot holds last position.""" + ... + + +@dataclass(frozen=True) +class RunStats: + """Statistics from a PolicyRuntime.run() session.""" + + steps: int + total_pops: int + total_holds: int + inference_count: int + transient_errors: int = 0 + stale_obs_ticks: int = 0 + + +class PolicyRuntime: + """Runs a policy on robot hardware. + + Loop: observe → maybe_request → pop → send → sleep. + Robot and cameras must be connected before run(). Caller owns lifecycle. + """ + + def __init__( # noqa: D107 + self, + robot: Robot, + model: InferenceModel, + execution: Execution, + fps: float, + cameras: Mapping[str, Camera] | None = None, + action_queue: ActionQueue | None = None, + obs_to_input: Callable[[RobotObservation, dict[str, Frame]], dict[str, Any]] | None = None, + callbacks: Sequence[RuntimeCallback] = (), + ) -> None: + if fps <= 0: + msg = f"fps must be positive, got {fps}" + raise ValueError(msg) + self._robot = robot + self._model = model + self._execution = execution + self._fps = fps + self._cameras: Mapping[str, Camera] = cameras or {} + self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=_DEFAULT_LERP_FRAMES)) + self._obs_to_input = obs_to_input or default_observation_to_input + self._callbacks = list(callbacks) + + def run(self, *, duration_s: float | None = None) -> RunStats: + """Run the control loop. + + Args: + duration_s: Maximum duration in seconds. None runs indefinitely. + + Returns: + Statistics from the run session. + + Raises: + WorkerDiedError: If the inference worker thread dies. + """ + self._execution.start(self._model, self._action_queue) + sample_obs = self._build_model_input() + self._execution.warmup(sample_obs) + + goal_time = 1.0 / self._fps + step = 0 + last_action: np.ndarray | None = None + + try: + while True: + if duration_s is not None and step * goal_time >= duration_s: + break + + loop_start = time.perf_counter() + + obs = self._build_model_input() + self._execution.maybe_request(obs) + + action = self._action_queue.pop() + if action is not None: + last_action = action + else: + action = last_action + holds = self._action_queue.consecutive_holds + if holds == 1: + logger.warning("Queue empty — holding position") + elif self._fps > 0 and holds % int(self._fps) == 0: + logger.warning( + "Queue starvation: %d consecutive holds (%.1fs)", + holds, + holds / self._fps, + ) + self._invoke_callback("on_hold", step=step, holds=holds) + + if action is None: + logger.error("No action available (warmup may have failed)") + step += 1 + continue + + modified = self._invoke_callback("before_send_action", action=action, step=step) + if modified is not None: + action = modified + + self._robot.send_action(action) + self._invoke_callback("on_action_sent", action=action, step=step) + + elapsed = time.perf_counter() - loop_start + sleep_time = goal_time - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + step += 1 + + except KeyboardInterrupt: + logger.info("Interrupted by user") + except WorkerDiedError: + logger.exception("Worker died during runtime") + raise + finally: + self._shutdown(step) + + return RunStats( + steps=step, + total_pops=self._action_queue.total_pops, + total_holds=self._action_queue.total_holds, + inference_count=getattr(self._execution, "inference_count", 0), + ) + + def _build_model_input(self) -> dict[str, Any]: + robot_obs = self._robot.get_observation() + camera_frames = {name: cam.read_latest() for name, cam in self._cameras.items()} + return self._obs_to_input(robot_obs, camera_frames) + + def _shutdown(self, step: int) -> None: + self._execution.stop() + + remaining = self._action_queue.remaining + drain_limit = min(remaining, int(self._fps)) + for _ in range(drain_limit): + action = self._action_queue.pop() + if action is not None: + self._robot.send_action(action) + time.sleep(1.0 / self._fps) + + logger.info( + "Shutdown complete — %d steps, %d pops, %d holds", + step, + self._action_queue.total_pops, + self._action_queue.total_holds, + ) + + def _invoke_callback(self, method: str, **kwargs: Any) -> Any: # noqa: ANN401 + result = None + for cb in self._callbacks: + fn = getattr(cb, method, None) + if fn is not None: + try: + result = fn(**kwargs) + except Exception: + logger.exception("Callback %s.%s raised", type(cb).__name__, method) + return result diff --git a/src/physicalai/runtime/smoothers.py b/src/physicalai/runtime/smoothers.py new file mode 100644 index 0000000..70d0bd9 --- /dev/null +++ b/src/physicalai/runtime/smoothers.py @@ -0,0 +1,71 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Chunk smoothers for runtime action queues.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import override + +import numpy as np + +_NDIM_2 = 2 +_ERR_2D = "remaining and incoming must be 2D arrays" +_ERR_ACTION_DIM = "remaining and incoming must have the same action_dim" + + +class ChunkSmoother(ABC): + """Merges a new action chunk into remaining actions from the previous chunk.""" + + @abstractmethod + def merge(self, remaining: np.ndarray, incoming: np.ndarray, offset: int = 0) -> np.ndarray: + """Merge a previous remainder with a new incoming chunk.""" + raise NotImplementedError + + +class ReplaceSmoother(ChunkSmoother): + """Replace remaining actions with the incoming tail.""" + + @override + def merge(self, remaining: np.ndarray, incoming: np.ndarray, offset: int = 0) -> np.ndarray: + """Return the incoming chunk after skipping the offset.""" + _validate_inputs(remaining, incoming) + return incoming[offset:] + + +class LerpSmoother(ChunkSmoother): + """Blend overlapping actions and append the incoming tail.""" + + def __init__(self, duration_frames: int = 5) -> None: + """Create a smoother with a fallback lerp window.""" + self.duration_frames = duration_frames + + @override + def merge(self, remaining: np.ndarray, incoming: np.ndarray, offset: int = 0) -> np.ndarray: + """Merge chunks using queue-mixer-style linear interpolation. + + Returns: + The blended action chunk. + """ + _validate_inputs(remaining, incoming) + + lerp_dur = max(offset, 1) if offset > 0 else self.duration_frames + incoming = incoming[offset:] + n_remain = len(remaining) + lerp_dur = min(n_remain, lerp_dur) + + weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0) + weights = weights[:, np.newaxis] + + n_blend = min(n_remain, len(incoming)) + blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend] + + return np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32) + + +def _validate_inputs(remaining: np.ndarray, incoming: np.ndarray) -> None: + if remaining.ndim != _NDIM_2 or incoming.ndim != _NDIM_2: + raise ValueError(_ERR_2D) + if remaining.shape[1] != incoming.shape[1]: + raise ValueError(_ERR_ACTION_DIM) From 8f2b00a55f3529065b62044394117f6f67e5e5ed Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Fri, 15 May 2026 14:25:21 +0200 Subject: [PATCH 02/19] add tests --- tests/unit/runtime/__init__.py | 0 tests/unit/runtime/test_action_queue.py | 163 +++++++++++++ tests/unit/runtime/test_execution.py | 235 +++++++++++++++++++ tests/unit/runtime/test_runtime.py | 289 ++++++++++++++++++++++++ tests/unit/runtime/test_smoothers.py | 126 +++++++++++ 5 files changed, 813 insertions(+) create mode 100644 tests/unit/runtime/__init__.py create mode 100644 tests/unit/runtime/test_action_queue.py create mode 100644 tests/unit/runtime/test_execution.py create mode 100644 tests/unit/runtime/test_runtime.py create mode 100644 tests/unit/runtime/test_smoothers.py diff --git a/tests/unit/runtime/__init__.py b/tests/unit/runtime/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/runtime/test_action_queue.py b/tests/unit/runtime/test_action_queue.py new file mode 100644 index 0000000..ec60296 --- /dev/null +++ b/tests/unit/runtime/test_action_queue.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import threading + +import numpy as np + +from physicalai.runtime._action_queue import ActionQueue +from physicalai.runtime.smoothers import LerpSmoother, ReplaceSmoother + + +class TestActionQueue: + def test_push_pop_roundtrip(self) -> None: + queue = ActionQueue() + chunk = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) + queue.push_chunk(chunk) + + actions = [queue.pop() for _ in range(3)] + assert all(a is not None for a in actions) + for i, action in enumerate(actions): + np.testing.assert_array_equal(action, chunk[i]) + + def test_pop_empty_returns_none(self) -> None: + queue = ActionQueue() + assert queue.pop() is None + + def test_consecutive_holds_increment_and_reset(self) -> None: + queue = ActionQueue() + queue.pop() + queue.pop() + assert queue.consecutive_holds == 2 + + queue.push_chunk(np.array([[1.0, 2.0]], dtype=np.float32)) + queue.pop() + assert queue.consecutive_holds == 0 + + def test_total_counters(self) -> None: + queue = ActionQueue() + queue.pop() + queue.pop() + queue.push_chunk(np.array([[1.0], [2.0], [3.0]], dtype=np.float32)) + queue.pop() + queue.pop() + queue.pop() + queue.pop() + + assert queue.total_holds == 3 + assert queue.total_pops == 3 + + def test_remaining_property(self) -> None: + queue = ActionQueue() + assert queue.remaining == 0 + + queue.push_chunk(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)) + assert queue.remaining == 2 + + queue.pop() + assert queue.remaining == 1 + + def test_below_threshold(self) -> None: + queue = ActionQueue() + assert queue.below_threshold(1) is True + + queue.push_chunk(np.array([[1.0], [2.0], [3.0]], dtype=np.float32)) + assert queue.below_threshold(4) is True + assert queue.below_threshold(3) is False + assert queue.below_threshold(2) is False + + def test_clear(self) -> None: + queue = ActionQueue() + queue.push_chunk(np.array([[1.0], [2.0]], dtype=np.float32)) + queue.pop() + queue.pop() + queue.pop() + assert queue.consecutive_holds == 1 + + queue.push_chunk(np.array([[3.0], [4.0]], dtype=np.float32)) + queue.clear() + + assert queue.remaining == 0 + assert queue.consecutive_holds == 0 + assert queue.total_holds == 1 + assert queue.total_pops == 2 + + def test_push_with_offset(self) -> None: + queue = ActionQueue() + chunk = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) + queue.push_chunk(chunk, offset=2) + + assert queue.remaining == 1 + action = queue.pop() + assert action is not None + np.testing.assert_array_equal(action, [5.0, 6.0]) + + def test_default_smoother_is_replace(self) -> None: + queue = ActionQueue() + assert isinstance(queue._smoother, ReplaceSmoother) + + def test_smoother_integration_lerp(self) -> None: + queue = ActionQueue(smoother=LerpSmoother(duration_frames=5)) + first = np.array([[10.0, 10.0], [20.0, 20.0], [30.0, 30.0]], dtype=np.float32) + queue.push_chunk(first) + + second = np.array( + [[100.0, 100.0], [110.0, 110.0], [120.0, 120.0], [130.0, 130.0]], + dtype=np.float32, + ) + queue.push_chunk(second) + + first_action = queue.pop() + assert first_action is not None + assert not np.array_equal(first_action, second[0]), "LerpSmoother should blend, not replace" + + def test_smoother_integration_replace(self) -> None: + queue = ActionQueue(smoother=ReplaceSmoother()) + first = np.array([[1.0], [2.0], [3.0]], dtype=np.float32) + queue.push_chunk(first) + + second = np.array([[10.0], [20.0]], dtype=np.float32) + queue.push_chunk(second) + + assert queue.remaining == 2 + action = queue.pop() + assert action is not None + np.testing.assert_array_equal(action, [10.0]) + + +class TestActionQueueThreadSafety: + def test_concurrent_push_pop(self) -> None: + queue = ActionQueue() + errors: list[Exception] = [] + action_dim = 4 + n_pushes = 100 + chunk_size = 10 + + def pusher() -> None: + try: + for i in range(n_pushes): + chunk = np.full((chunk_size, action_dim), float(i), dtype=np.float32) + queue.push_chunk(chunk) + except Exception as exc: + errors.append(exc) + + def popper(stop_event: threading.Event) -> None: + try: + while not stop_event.is_set(): + queue.pop() + except Exception as exc: + errors.append(exc) + + stop = threading.Event() + push_thread = threading.Thread(target=pusher) + pop_thread = threading.Thread(target=popper, args=(stop,)) + + push_thread.start() + pop_thread.start() + + push_thread.join(timeout=5.0) + stop.set() + pop_thread.join(timeout=5.0) + + assert not push_thread.is_alive(), "Push thread deadlocked" + assert not pop_thread.is_alive(), "Pop thread deadlocked" + assert not errors, f"Thread errors: {errors}" diff --git a/tests/unit/runtime/test_execution.py b/tests/unit/runtime/test_execution.py new file mode 100644 index 0000000..08e354e --- /dev/null +++ b/tests/unit/runtime/test_execution.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +import threading +import time +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from physicalai.runtime._action_queue import ActionQueue +from physicalai.runtime.execution import AsyncExecution, SyncExecution, WorkerDiedError + + +def _make_mock_model(chunk: np.ndarray | None = None) -> MagicMock: + model = MagicMock() + if chunk is None: + chunk = np.random.randn(6, 4).astype(np.float32) + model.predict_action_chunk.return_value = chunk + return model + + +class TestSyncExecution: + def test_warmup_seeds_queue_and_discovers_chunk_size(self) -> None: + chunk = np.random.randn(8, 3).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = SyncExecution() + obs = {"state": np.zeros(3)} + + ex.start(model, queue) + ex.warmup(obs) + + assert ex.chunk_size == 8 + assert queue.remaining == 8 + model.predict_action_chunk.assert_called_once_with(obs) + + def test_maybe_request_refills_when_empty(self) -> None: + chunk = np.random.randn(4, 2).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = SyncExecution() + obs = {"state": np.zeros(2)} + + ex.start(model, queue) + ex.warmup(obs) + + for _ in range(4): + queue.pop() + assert queue.remaining == 0 + + model.predict_action_chunk.reset_mock() + model.predict_action_chunk.return_value = chunk + ex.maybe_request(obs) + + assert queue.remaining == 4 + model.predict_action_chunk.assert_called_once() + + def test_maybe_request_does_not_refill_when_nonempty(self) -> None: + chunk = np.random.randn(4, 2).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = SyncExecution() + obs = {"state": np.zeros(2)} + + ex.start(model, queue) + ex.warmup(obs) + queue.pop() + + model.predict_action_chunk.reset_mock() + ex.maybe_request(obs) + model.predict_action_chunk.assert_not_called() + + def test_stop_is_noop(self) -> None: + ex = SyncExecution() + ex.stop() + + +class TestAsyncExecution: + def test_start_spawns_thread(self) -> None: + model = _make_mock_model() + queue = ActionQueue() + ex = AsyncExecution(fps=10) + + ex.start(model, queue) + assert ex.alive is True + ex.stop() + + def test_warmup_seeds_queue(self) -> None: + chunk = np.random.randn(6, 4).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = AsyncExecution(fps=10) + + ex.start(model, queue) + obs = {"state": np.zeros(4)} + ex.warmup(obs) + + assert ex.chunk_size == 6 + assert queue.remaining == 6 + ex.stop() + + def test_maybe_request_submits_when_below_threshold(self) -> None: + chunk = np.random.randn(10, 2).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = AsyncExecution(threshold=0.5, fps=10) + + ex.start(model, queue) + obs = {"state": np.zeros(2)} + ex.warmup(obs) + + for _ in range(10): + queue.pop() + + model.predict_action_chunk.reset_mock() + model.predict_action_chunk.return_value = chunk + ex.maybe_request(obs) + + time.sleep(0.3) + assert queue.remaining > 0 + ex.stop() + + def test_defensive_copy_of_observation(self) -> None: + chunk = np.random.randn(4, 2).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = AsyncExecution(threshold=0.5, fps=10) + + ex.start(model, queue) + obs = {"state": np.zeros(2)} + ex.warmup(obs) + for _ in range(4): + queue.pop() + + model.predict_action_chunk.reset_mock() + original_state = np.array([1.0, 2.0]) + obs_to_submit = {"state": original_state.copy()} + ex.maybe_request(obs_to_submit) + obs_to_submit["state"][:] = 99.0 + + time.sleep(0.3) + if model.predict_action_chunk.called: + submitted = model.predict_action_chunk.call_args[0][0]["state"] + np.testing.assert_array_equal(submitted, original_state) + ex.stop() + + def test_worker_death_raises_error(self) -> None: + model = _make_mock_model() + model.predict_action_chunk.side_effect = [ + np.random.randn(4, 2).astype(np.float32), + ValueError("model exploded"), + ] + queue = ActionQueue() + ex = AsyncExecution(threshold=0.5, fps=10) + + ex.start(model, queue) + obs = {"state": np.zeros(2)} + ex.warmup(obs) + + for _ in range(4): + queue.pop() + + ex.maybe_request(obs) + time.sleep(0.5) + + with pytest.raises(WorkerDiedError, match="model exploded"): + ex.maybe_request(obs) + + ex.stop() + + def test_stop_signals_and_joins(self) -> None: + model = _make_mock_model() + queue = ActionQueue() + ex = AsyncExecution(fps=10) + + ex.start(model, queue) + assert ex.alive is True + + ex.stop() + assert ex._thread is not None + assert not ex._thread.is_alive() + + def test_health_properties(self) -> None: + chunk = np.random.randn(4, 2).astype(np.float32) + model = _make_mock_model(chunk) + queue = ActionQueue() + ex = AsyncExecution(fps=10) + + ex.start(model, queue) + obs = {"state": np.zeros(2)} + ex.warmup(obs) + + assert ex.inference_count == 0 + + for _ in range(4): + queue.pop() + + model.predict_action_chunk.reset_mock() + model.predict_action_chunk.return_value = chunk + ex.maybe_request(obs) + time.sleep(0.3) + + assert ex.inference_count >= 1 + ex.stop() + + def test_watchdog_triggers_force_reset(self) -> None: + chunk = np.random.randn(4, 2).astype(np.float32) + model = _make_mock_model(chunk) + + call_count = 0 + original_side_effect = None + + def slow_predict(obs: dict) -> np.ndarray: + nonlocal call_count + call_count += 1 + if call_count == 2: + time.sleep(100) + return chunk + + model.predict_action_chunk.side_effect = slow_predict + queue = ActionQueue() + ex = AsyncExecution(threshold=0.5, fps=10, watchdog_timeout_s=0.1) + + ex.start(model, queue) + obs = {"state": np.zeros(2)} + ex.warmup(obs) + + for _ in range(4): + queue.pop() + ex.maybe_request(obs) + + time.sleep(0.3) + ex.maybe_request(obs) + + ex.stop() diff --git a/tests/unit/runtime/test_runtime.py b/tests/unit/runtime/test_runtime.py new file mode 100644 index 0000000..6f1b378 --- /dev/null +++ b/tests/unit/runtime/test_runtime.py @@ -0,0 +1,289 @@ +# Copyright (C) 2025-2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for physicalai.runtime.runtime.""" + +from __future__ import annotations + +import time +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from physicalai.runtime._action_queue import ActionQueue +from physicalai.runtime.execution import SyncExecution, WorkerDiedError +from physicalai.runtime.runtime import PolicyRuntime, RunStats, default_observation_to_input + +from physicalai.capture import Frame + + +@dataclass +class FakeRobotObservation: + joint_positions: np.ndarray + timestamp: float + sensor_data: dict[str, np.ndarray] | None + images: dict | None + + +def _make_mock_robot(joint_positions: np.ndarray | None = None) -> MagicMock: + robot = MagicMock() + if joint_positions is None: + joint_positions = np.array([0.1, 0.2, 0.3], dtype=np.float32) + robot.get_observation.return_value = FakeRobotObservation( + joint_positions=joint_positions, + timestamp=time.monotonic(), + sensor_data=None, + images=None, + ) + return robot + + +def _make_mock_model(chunk_size: int = 4, action_dim: int = 3) -> MagicMock: + model = MagicMock() + model.predict_action_chunk.return_value = np.random.randn(chunk_size, action_dim).astype(np.float32) + return model + + +def _exhaustible_side_effect( + initial_chunks: list[np.ndarray], + action_dim: int = 2, +) -> Callable[[Any], np.ndarray]: + """Return *initial_chunks* in order, then empty arrays forever. + + Prevents StopIteration when SyncExecution refills more times than + the test expected. + """ + it = iter(initial_chunks) + empty = np.empty((0, action_dim), dtype=np.float32) + return lambda _obs: next(it, empty) + + +class TestPolicyRuntime: + def test_full_loop_with_duration(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model(chunk_size=20, action_dim=3) + execution = SyncExecution() + queue = ActionQueue() + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + action_queue=queue, + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + stats = runtime.run(duration_s=0.5) + + assert stats.steps == 5 + assert robot.send_action.call_count >= 5 + + def test_hold_fallback_when_queue_empty(self) -> None: + robot = _make_mock_robot() + chunk = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) + model = _make_mock_model() + model.predict_action_chunk.side_effect = _exhaustible_side_effect([chunk], action_dim=2) + + execution = SyncExecution() + queue = ActionQueue() + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + action_queue=queue, + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + stats = runtime.run(duration_s=0.4) + + assert stats.steps == 4 + assert robot.send_action.call_count == 4 + + def test_worker_died_error_propagation(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model(chunk_size=4) + + execution = MagicMock() + execution.start = MagicMock() + execution.warmup = MagicMock() + execution.maybe_request.side_effect = WorkerDiedError("dead") + execution.stop = MagicMock() + + queue = ActionQueue() + queue.push_chunk(np.random.randn(4, 3).astype(np.float32)) + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + action_queue=queue, + ) + + with patch("physicalai.runtime.runtime.time") as mock_time, pytest.raises(WorkerDiedError, match="dead"): + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + runtime.run(duration_s=1.0) + + def test_shutdown_does_not_disconnect(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model() + execution = SyncExecution() + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + runtime.run(duration_s=0.1) + + robot.disconnect.assert_not_called() + + +class TestRuntimeCallback: + def test_before_send_action_called(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model(chunk_size=10) + execution = SyncExecution() + callback = MagicMock() + callback.before_send_action.return_value = None + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + callbacks=[callback], + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + runtime.run(duration_s=0.2) + + assert callback.before_send_action.call_count == 2 + + def test_callback_raises_does_not_crash_loop(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model(chunk_size=10) + execution = SyncExecution() + bad_callback = MagicMock() + bad_callback.before_send_action.side_effect = RuntimeError("oops") + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + callbacks=[bad_callback], + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + stats = runtime.run(duration_s=0.3) + + assert stats.steps == 3 + + def test_on_hold_called_when_queue_empty(self) -> None: + robot = _make_mock_robot() + chunk = np.array([[1.0, 2.0]], dtype=np.float32) + model = _make_mock_model() + model.predict_action_chunk.side_effect = _exhaustible_side_effect([chunk], action_dim=2) + + execution = SyncExecution() + callback = MagicMock() + callback.before_send_action.return_value = None + callback.on_hold.return_value = None + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + callbacks=[callback], + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + runtime.run(duration_s=0.3) + + assert callback.on_hold.call_count >= 1 + + +class TestRunStats: + def test_fields_populated(self) -> None: + stats = RunStats(steps=10, total_pops=8, total_holds=2, inference_count=3) + assert stats.steps == 10 + assert stats.total_pops == 8 + assert stats.total_holds == 2 + assert stats.inference_count == 3 + + +class TestDefaultObservationToInput: + def test_extracts_joint_positions(self) -> None: + positions = np.array([0.1, 0.2, 0.3], dtype=np.float32) + obs = FakeRobotObservation( + joint_positions=positions, + timestamp=0.0, + sensor_data=None, + images=None, + ) + + result = default_observation_to_input(obs, {}) + + assert "state" in result + np.testing.assert_array_equal(result["state"], np.array([positions], dtype=np.float32)) + + def test_extracts_frame_data(self) -> None: + obs = FakeRobotObservation( + joint_positions=np.zeros(3), + timestamp=0.0, + sensor_data=None, + images=None, + ) + frame = Frame(data=np.zeros((480, 640, 3), dtype=np.uint8), timestamp=0.0, sequence=0) + + result = default_observation_to_input(obs, camera_frames={"cam0": frame}) + + assert "images.cam0" in result + np.testing.assert_array_equal(result["images.cam0"], frame.data) + + def test_custom_obs_to_input(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model(chunk_size=10) + execution = SyncExecution() + + custom_fn = MagicMock(return_value={"custom_key": np.zeros(3)}) + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + obs_to_input=custom_fn, + ) + + with patch("physicalai.runtime.runtime.time") as mock_time: + mock_time.perf_counter.return_value = 0.0 + mock_time.sleep = MagicMock() + runtime.run(duration_s=0.1) + + assert custom_fn.call_count >= 2 diff --git a/tests/unit/runtime/test_smoothers.py b/tests/unit/runtime/test_smoothers.py new file mode 100644 index 0000000..d5c02b0 --- /dev/null +++ b/tests/unit/runtime/test_smoothers.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import numpy as np +import pytest + +from physicalai.runtime.smoothers import LerpSmoother, ReplaceSmoother + + +class TestReplaceSmoother: + def test_merge_drops_remaining_and_returns_incoming_offset(self) -> None: + smoother = ReplaceSmoother() + remaining = np.array([[1.0, 1.0], [1.0, 1.0]], dtype=np.float32) + incoming = np.array([[2.0, 2.0], [3.0, 3.0], [4.0, 4.0]], dtype=np.float32) + + result = smoother.merge(remaining, incoming, offset=1) + + np.testing.assert_array_equal( + result, + np.array([[3.0, 3.0], [4.0, 4.0]], dtype=np.float32), + ) + + def test_offset_zero_returns_all_incoming(self) -> None: + smoother = ReplaceSmoother() + remaining = np.array([[1.0, 1.0]], dtype=np.float32) + incoming = np.array([[2.0, 2.0], [3.0, 3.0]], dtype=np.float32) + + result = smoother.merge(remaining, incoming, offset=0) + + np.testing.assert_array_equal(result, incoming) + + def test_offset_beyond_incoming_returns_empty_array(self) -> None: + smoother = ReplaceSmoother() + remaining = np.array([[1.0, 1.0]], dtype=np.float32) + incoming = np.array([[2.0, 2.0]], dtype=np.float32) + + result = smoother.merge(remaining, incoming, offset=5) + + assert result.shape == (0, 2) + assert result.dtype == incoming.dtype + + +class TestLerpSmoother: + def test_lerp_weights_match_queue_mixer_formula(self) -> None: + smoother = LerpSmoother(duration_frames=5) + remaining = np.array([[10.0, 10.0], [20.0, 20.0], [30.0, 30.0]], dtype=np.float32) + incoming = np.array( + [[100.0, 100.0], [110.0, 110.0], [120.0, 120.0], [130.0, 130.0]], + dtype=np.float32, + ) + + result = smoother.merge(remaining, incoming, offset=0) + + expected = np.array( + [[10.0, 10.0], [50.0, 50.0], [90.0, 90.0], [130.0, 130.0]], + dtype=np.float32, + ) + np.testing.assert_array_equal(result, expected) + + def test_offset_aware_duration(self) -> None: + smoother = LerpSmoother(duration_frames=99) + remaining = np.array([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], dtype=np.float32) + incoming = np.array([[4.0, 4.0], [5.0, 5.0], [6.0, 6.0], [7.0, 7.0]], dtype=np.float32) + + result = smoother.merge(remaining, incoming, offset=2) + + expected = np.array([[1.0, 1.0], [4.5, 4.5]], dtype=np.float32) + np.testing.assert_array_equal(result, expected) + + def test_edge_cases_empty_remaining_single_element_and_offset_beyond_chunk(self) -> None: + smoother = LerpSmoother(duration_frames=5) + + empty_result = smoother.merge( + np.empty((0, 2), dtype=np.float32), + np.array([[1.0, 1.0]], dtype=np.float32), + offset=0, + ) + np.testing.assert_array_equal(empty_result, np.array([[1.0, 1.0]], dtype=np.float32)) + + single_result = smoother.merge( + np.array([[1.0, 1.0]], dtype=np.float32), + np.array([[2.0, 2.0]], dtype=np.float32), + offset=0, + ) + np.testing.assert_array_equal(single_result, np.array([[1.0, 1.0]], dtype=np.float32)) + + offset_beyond_result = smoother.merge( + np.array([[1.0, 1.0]], dtype=np.float32), + np.array([[2.0, 2.0]], dtype=np.float32), + offset=5, + ) + assert offset_beyond_result.shape == (0, 2) + + def test_exact_numerical_blend_values(self) -> None: + smoother = LerpSmoother(duration_frames=5) + remaining = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]], dtype=np.float32) + incoming = np.array( + [[2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0]], + dtype=np.float32, + ) + + result = smoother.merge(remaining, incoming, offset=1) + + expected = np.array( + [[1.0, 1.0], [2.0, 2.0], [2.0, 2.0]], + dtype=np.float32, + ) + np.testing.assert_array_equal(result, expected) + + def test_merge_is_stateless_for_same_arguments(self) -> None: + smoother = LerpSmoother(duration_frames=5) + remaining = np.array([[1.0, 1.0], [2.0, 2.0]], dtype=np.float32) + incoming = np.array([[3.0, 3.0], [4.0, 4.0], [5.0, 5.0]], dtype=np.float32) + + first = smoother.merge(remaining, incoming, offset=1) + second = smoother.merge(remaining, incoming, offset=1) + + np.testing.assert_array_equal(first, second) + + +def test_input_validation_mismatched_action_dim_raises_value_error() -> None: + smoother = ReplaceSmoother() + remaining = np.array([[1.0, 1.0]], dtype=np.float32) + incoming = np.array([[2.0, 2.0, 2.0]], dtype=np.float32) + + with pytest.raises(ValueError, match="action_dim"): + smoother.merge(remaining, incoming, offset=0) From ab1bb3141e9324ebb5c9ea222545f3d6269fce30 Mon Sep 17 00:00:00 2001 From: Max X Date: Sun, 17 May 2026 16:09:41 +0200 Subject: [PATCH 03/19] add goal time to send action --- examples/runtime/inference_pi05_gpu.ipynb | 838 ---------------------- src/physicalai/robot/__init__.py | 3 +- src/physicalai/runtime/runtime.py | 3 +- 3 files changed, 4 insertions(+), 840 deletions(-) delete mode 100644 examples/runtime/inference_pi05_gpu.ipynb diff --git a/examples/runtime/inference_pi05_gpu.ipynb b/examples/runtime/inference_pi05_gpu.ipynb deleted file mode 100644 index fcd656c..0000000 --- a/examples/runtime/inference_pi05_gpu.ipynb +++ /dev/null @@ -1,838 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "68d10b89", - "metadata": {}, - "source": [ - "# Pi05 GPU Inference (PyTorch + CUDA)\n", - "\n", - "Loads Eugene's finetuned Pi05 checkpoint and runs inference on RTX 3090.\n", - "Expected: ~1-5s per action chunk (vs ~90s on CPU with OpenVINO)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cf8f6bdd", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import numpy as np\n", - "import cv2\n", - "from collections import deque\n", - "from matplotlib import pyplot as plt\n", - "\n", - "print(f\"CUDA available: {torch.cuda.is_available()}\")\n", - "if torch.cuda.is_available():\n", - " print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n", - " print(f\"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "98f8f8c7", - "metadata": {}, - "outputs": [], - "source": [ - "# --- Config ---\n", - "CHECKPOINT_PATH = \"pi05/pi05_eugene.ckpt\"\n", - "DEVICE = \"cuda\" # \"cuda\" or \"cpu\"\n", - "TASK = \"pick a can and place it in a bowl\"\n", - "\n", - "JOINT_NAMES = [\n", - " \"shoulder_pan\",\n", - " \"shoulder_lift\",\n", - " \"elbow_flex\",\n", - " \"wrist_flex\",\n", - " \"wrist_roll\",\n", - " \"gripper\",\n", - "]\n", - "\n", - "# Camera config (SharedCamera UVC device indices)\n", - "OVERHEAD_CAMERA_INDEX = \"/dev/v4l/by-id/usb-UGREEN_Camera_2K_UGREEN_Camera_2K_SN0001-video-index0\"\n", - "ARM_CAMERA_INDEX = \"/dev/v4l/by-id/usb-Innomaker_Innomaker-U20CAM-1080p-S1_SN0001-video-index0\"\n", - "FLIP_CAMERAS = {\"arm\"} # Cameras mounted upside down\n", - "\n", - "CAMERA_WIDTH = 640\n", - "CAMERA_FPS = 30\n", - "CAMERA_HEIGHT = 480" - ] - }, - { - "cell_type": "markdown", - "id": "8f32365d", - "metadata": {}, - "source": [ - "## Load Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9dce2df9", - "metadata": {}, - "outputs": [], - "source": [ - "from physicalai.policies.pi05 import Pi05\n", - "\n", - "# Load to CPU first to avoid OOM if GPU has residual allocations\n", - "policy = Pi05.load_from_checkpoint(CHECKPOINT_PATH, map_location=\"cpu\", compile_model=False)\n", - "policy = policy.to(DEVICE)\n", - "policy.eval()\n", - "print(f\"Model loaded on {DEVICE}\")\n", - "print(f\"VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")" - ] - }, - { - "cell_type": "markdown", - "id": "ad387d02", - "metadata": {}, - "source": [ - "## Connect Cameras & Robot" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e022b604", - "metadata": {}, - "outputs": [], - "source": [ - "from cv2_enumerate_cameras import enumerate_cameras\n", - "\n", - "print(\"Available cameras:\")\n", - "for c in enumerate_cameras(apiPreference=cv2.CAP_GSTREAMER):\n", - " print(f\" - {c.name} (index {c.index})\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8c0a963", - "metadata": {}, - "outputs": [], - "source": [ - "from physicalai.capture import SharedCamera\n", - "from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig\n", - "\n", - "# Cameras via SharedCamera (UVC)\n", - "cameras = {\n", - " \"overhead\": SharedCamera(\"uvc\", device=OVERHEAD_CAMERA_INDEX, width=CAMERA_WIDTH, height=CAMERA_HEIGHT, fps=CAMERA_FPS),\n", - " \"arm\": SharedCamera(\"uvc\", device=ARM_CAMERA_INDEX, width=CAMERA_WIDTH, height=CAMERA_HEIGHT, fps=CAMERA_FPS),\n", - "}\n", - "for name, cam in cameras.items():\n", - " cam.connect()\n", - " print(f\"Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps\")\n", - "\n", - "# Robot without cameras\n", - "robot_cfg = SO101FollowerConfig(\n", - " port=\"/dev/ttyACM1\",\n", - " id=\"my_so101_follower\",\n", - "\n", - ")\n", - "\n", - "robot = SO101Follower(robot_cfg)\n", - "robot.connect()\n", - "print(\"Robot connected\")" - ] - }, - { - "cell_type": "markdown", - "id": "049e73d1", - "metadata": {}, - "source": [ - "## Get Observation & Display" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1630af6", - "metadata": {}, - "outputs": [], - "source": [ - "def get_full_observation() -> dict:\n", - " \"\"\"Combine robot state + SharedCamera images into one dict.\"\"\"\n", - " obs = robot.get_observation()\n", - " for cam_key, cam in cameras.items():\n", - " frame = cam.read_latest()\n", - " obs[cam_key] = frame.data # numpy HWC RGB\n", - " return obs\n", - "\n", - "def viz_observation(obs: dict):\n", - " print(\"Joint positions:\")\n", - " for jn in JOINT_NAMES:\n", - " print(f\" {jn}: {obs[f'{jn}.pos']:.4f}\")\n", - "\n", - " fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n", - " for ax, cam_key in zip(axes, [\"overhead\", \"arm\"]):\n", - " img = np.ascontiguousarray(obs[cam_key])\n", - " if cam_key in FLIP_CAMERAS:\n", - " img = cv2.rotate(img, cv2.ROTATE_180)\n", - " ax.imshow(img)\n", - " ax.set_title(f\"{cam_key} ({img.shape})\")\n", - " ax.axis(\"off\")\n", - "\n", - " plt.tight_layout()\n", - " plt.show()\n", - "\n", - "obs = get_full_observation()\n", - "viz_observation(obs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ef87e90", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "c899db7a", - "metadata": {}, - "source": [ - "## Build Observation for Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb2ceb63", - "metadata": {}, - "outputs": [], - "source": [ - "from physicalai.data.observation import Observation\n", - "\n", - "def build_observation(obs: dict) -> Observation:\n", - " \"\"\"Convert robot observation to Pi05 Observation.\"\"\"\n", - " # State: (1, 6) float32\n", - " state = torch.tensor(\n", - " [[obs[f\"{jn}.pos\"] for jn in JOINT_NAMES]], dtype=torch.float32, device=DEVICE\n", - " )\n", - "\n", - " # Images: {name: (1, 3, H, W) float32 in [0, 1]}\n", - " def img_to_tensor(img: np.ndarray) -> torch.Tensor:\n", - " t = torch.from_numpy(img.copy()).float() / 255.0 # (H, W, 3) -> [0,1]\n", - " t = t.permute(2, 0, 1).unsqueeze(0) # (1, 3, H, W)\n", - " return t.to(DEVICE)\n", - "\n", - " images = {}\n", - " for cam_key in [\"overhead\", \"arm\"]:\n", - " img = np.ascontiguousarray(obs[cam_key])\n", - " if cam_key in FLIP_CAMERAS:\n", - " img = cv2.rotate(img, cv2.ROTATE_180)\n", - " images[cam_key] = img_to_tensor(img)\n", - "\n", - " return Observation(state=state, images=images, task=TASK)\n", - "\n", - "observation = build_observation(get_full_observation())\n", - "print(f\"State shape: {observation.state.shape}\")\n", - "for k, v in observation.images.items():\n", - " print(f\"Image '{k}': {v.shape}, device={v.device}\")" - ] - }, - { - "cell_type": "markdown", - "id": "3a0557e9", - "metadata": {}, - "source": [ - "## Run Inference (single step, timed)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5167ad02", - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "\n", - "# Reset action queue to get a fresh prediction\n", - "policy._action_queue = deque()\n", - "\n", - "# Warm up GPU\n", - "if DEVICE == \"cuda\":\n", - " torch.cuda.synchronize()\n", - "\n", - "t0 = time.perf_counter()\n", - "with torch.no_grad():\n", - " action = policy.select_action(observation)\n", - "if DEVICE == \"cuda\":\n", - " torch.cuda.synchronize()\n", - "latency = time.perf_counter() - t0\n", - "\n", - "action_np = action.detach().cpu().numpy()\n", - "if action_np.ndim == 2:\n", - " action_np = action_np[0]\n", - "\n", - "print(f\"Inference latency: {latency*1000:.1f} ms\")\n", - "print(f\"Action shape: {action_np.shape}\")\n", - "print(f\"Action: {action_np}\")\n", - "print()\n", - "for i, jn in enumerate(JOINT_NAMES):\n", - " print(f\" {jn}: {action_np[i]:.4f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "130d146d", - "metadata": {}, - "source": [ - "## Benchmark (5 inferences)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a7dd57d", - "metadata": {}, - "outputs": [], - "source": [ - "latencies = []\n", - "for i in range(5):\n", - " policy._action_queue = deque()\n", - " if DEVICE == \"cuda\":\n", - " torch.cuda.synchronize()\n", - " t0 = time.perf_counter()\n", - " with torch.no_grad():\n", - " action = policy.select_action(observation)\n", - " if DEVICE == \"cuda\":\n", - " torch.cuda.synchronize()\n", - " latencies.append(time.perf_counter() - t0)\n", - "\n", - "print(f\"Latencies: {[f'{l*1000:.1f}ms' for l in latencies]}\")\n", - "print(f\"Mean: {np.mean(latencies)*1000:.1f} ms\")\n", - "print(f\"Min: {np.min(latencies)*1000:.1f} ms\")\n", - "print(f\"Max: {np.max(latencies)*1000:.1f} ms\")" - ] - }, - { - "cell_type": "markdown", - "id": "72d136ad", - "metadata": {}, - "source": [ - "## Send Action to Robot\n", - "\n", - "**Run only when ready to move.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21aeb2ac", - "metadata": {}, - "outputs": [], - "source": [ - "action_dict = {f\"{jn}.pos\": float(action_np[i]) for i, jn in enumerate(JOINT_NAMES)}\n", - "robot.send_action(action_dict)\n", - "print(f\"Sent: {action_dict}\")" - ] - }, - { - "cell_type": "markdown", - "id": "f7084ab5", - "metadata": {}, - "source": [ - "## Rollout Loop (observe → infer → execute full chunk)\n", - "\n", - "Runs a continuous loop: get observation, predict action chunk, execute all actions in the chunk at 30Hz, then repeat. **Interrupt the kernel to stop.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "66acc85f", - "metadata": {}, - "outputs": [], - "source": [ - "# FPS = 30\n", - "# CHUNK_SIZE = 50 # Pi05 default action chunk length\n", - "\n", - "# try:\n", - "# step = 0\n", - "# while True:\n", - "# # Get fresh observation\n", - "# obs = get_full_observation()\n", - "# observation = build_observation(obs)\n", - "\n", - "# # Generate full action chunk\n", - "# policy._action_queue = deque()\n", - "# t0 = time.perf_counter()\n", - "# with torch.no_grad():\n", - "# action = policy.select_action(observation)\n", - "# infer_ms = (time.perf_counter() - t0) * 1000\n", - "\n", - "# # Drain remaining actions from the queue (select_action returns first, rest are queued)\n", - "# actions = [action.detach().cpu().numpy()]\n", - "# while len(policy._action_queue) > 0:\n", - "# a = policy._action_queue.popleft()\n", - "# actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", - "\n", - "# print(f\"[step {step}] Inferred {len(actions)} actions in {infer_ms:.0f}ms\")\n", - "\n", - "# # Execute each action in the chunk at target FPS\n", - "# for i, act in enumerate(actions):\n", - "# t_act = time.perf_counter()\n", - "# act = np.squeeze(act)\n", - "# action_dict = {f\"{jn}.pos\": float(act[j]) for j, jn in enumerate(JOINT_NAMES)}\n", - "# robot.send_action(action_dict)\n", - "\n", - "# # Sleep to maintain target FPS\n", - "# elapsed = time.perf_counter() - t_act\n", - "# sleep_time = (1.0 / FPS) - elapsed\n", - "# if sleep_time > 0:\n", - "# time.sleep(sleep_time)\n", - "\n", - "# step += 1\n", - "\n", - "# except KeyboardInterrupt:\n", - "# print(f\"\\nStopped after {step} chunks\")" - ] - }, - { - "cell_type": "markdown", - "id": "cd3ce7cc", - "metadata": {}, - "source": [ - "## Async Threaded Rollout\n", - "\n", - "Background thread runs GPU inference while the main thread sends actions at 30Hz.\n", - "Lerp-blends overlapping chunks for smooth transitions. **Interrupt the kernel to stop.**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d054a13", - "metadata": {}, - "outputs": [], - "source": [ - "import threading\n", - "\n", - "class QueueMixer:\n", - " \"\"\"Action queue with lerp blending between chunks.\"\"\"\n", - "\n", - " def __init__(self, lerp_duration: int = 5):\n", - " self.queue: np.ndarray | None = None\n", - " self.lerp_duration = lerp_duration\n", - " self.index = 0\n", - "\n", - " def add(self, chunk: np.ndarray, offset: int = 0):\n", - " if self.queue is None or self.index >= len(self.queue):\n", - " self.queue = chunk[offset:]\n", - " self.index = 0\n", - " return\n", - "\n", - " remaining = self.queue[self.index:]\n", - " incoming = chunk[offset:]\n", - " n_remain = len(remaining)\n", - " lerp_dur = min(n_remain, self.lerp_duration)\n", - "\n", - " weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0)[:, np.newaxis]\n", - " n_blend = min(n_remain, len(incoming))\n", - " blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend]\n", - " self.queue = np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32)\n", - " self.index = 0\n", - "\n", - " def pop(self) -> np.ndarray | None:\n", - " if self.queue is None or self.index >= len(self.queue):\n", - " return None\n", - " action = self.queue[self.index]\n", - " self.index += 1\n", - " return action\n", - "\n", - " @property\n", - " def remaining(self) -> int:\n", - " if self.queue is None:\n", - " return 0\n", - " return max(len(self.queue) - self.index, 0)\n", - "\n", - "\n", - "class GpuInferenceThread:\n", - " \"\"\"Background thread that runs PyTorch GPU inference on demand.\"\"\"\n", - "\n", - " def __init__(self, policy, device: str):\n", - " self.policy = policy\n", - " self.device = device\n", - " self._lock = threading.Lock()\n", - " self._obs_slot: dict | None = None\n", - " self._obs_ready = threading.Event()\n", - " self._running_inference = False\n", - " self._request_time = 0.0\n", - " self._result_lock = threading.Lock()\n", - " self._result_slot: tuple[np.ndarray, float] | None = None\n", - " self._stop = threading.Event()\n", - " self._thread = threading.Thread(target=self._run, name=\"GpuInferenceThread\", daemon=True)\n", - " self.inference_count = 0\n", - "\n", - " def start(self):\n", - " self._thread.start()\n", - "\n", - " def stop(self):\n", - " self._stop.set()\n", - " self._obs_ready.set()\n", - " self._thread.join(timeout=10.0)\n", - "\n", - " def request(self, obs_dict: dict) -> bool:\n", - " with self._lock:\n", - " if self._obs_slot is not None or self._running_inference:\n", - " return False\n", - " self._obs_slot = obs_dict\n", - " self._request_time = time.perf_counter()\n", - " self._obs_ready.set()\n", - " return True\n", - "\n", - " @property\n", - " def busy(self) -> bool:\n", - " with self._lock:\n", - " return self._obs_slot is not None or self._running_inference\n", - "\n", - " @property\n", - " def alive(self) -> bool:\n", - " return self._thread.is_alive()\n", - "\n", - " @property\n", - " def busy_duration(self) -> float:\n", - " \"\"\"Seconds since last request was submitted. 0 if not busy.\"\"\"\n", - " with self._lock:\n", - " if not (self._obs_slot is not None or self._running_inference):\n", - " return 0.0\n", - " return time.perf_counter() - self._request_time\n", - "\n", - " def force_reset(self):\n", - " \"\"\"Clear stuck state so new requests can be submitted.\"\"\"\n", - " with self._lock:\n", - " self._obs_slot = None\n", - " self._running_inference = False\n", - " print(\"[InferenceThread] Force reset — cleared stuck state\")\n", - "\n", - " def get_result(self) -> tuple[np.ndarray, float] | None:\n", - " with self._result_lock:\n", - " r = self._result_slot\n", - " self._result_slot = None\n", - " return r\n", - "\n", - " def _run(self):\n", - " while not self._stop.is_set():\n", - " self._obs_ready.wait()\n", - " self._obs_ready.clear()\n", - " if self._stop.is_set():\n", - " break\n", - "\n", - " with self._lock:\n", - " obs_dict = self._obs_slot\n", - " self._obs_slot = None\n", - " self._running_inference = True\n", - "\n", - " if obs_dict is None:\n", - " with self._lock:\n", - " self._running_inference = False\n", - " continue\n", - "\n", - " self.inference_count += 1\n", - " t0 = time.perf_counter()\n", - "\n", - " try:\n", - " observation = build_observation(obs_dict)\n", - " self.policy._action_queue = deque()\n", - "\n", - " with torch.no_grad():\n", - " action = self.policy.select_action(observation)\n", - " if self.device == \"cuda\":\n", - " torch.cuda.synchronize()\n", - "\n", - " # Collect full chunk\n", - " actions = [action.detach().cpu().numpy()]\n", - " while len(self.policy._action_queue) > 0:\n", - " a = self.policy._action_queue.popleft()\n", - " actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", - "\n", - " chunk = np.array([np.squeeze(a) for a in actions], dtype=np.float32)\n", - " except Exception as e:\n", - " import traceback\n", - " print(f\"[InferenceThread] Error: {e}\")\n", - " traceback.print_exc()\n", - " with self._lock:\n", - " self._running_inference = False\n", - " continue\n", - "\n", - " latency = time.perf_counter() - t0\n", - " with self._result_lock:\n", - " self._result_slot = (chunk, latency)\n", - " with self._lock:\n", - " self._running_inference = False\n", - "\n", - "print(\"QueueMixer and GpuInferenceThread defined\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "664e5887", - "metadata": {}, - "outputs": [], - "source": [ - "ASYNC_FPS = 30\n", - "QUEUE_THRESHOLD_FRAC = 0.5 # Request new inference when queue drops below this fraction\n", - "LERP_DURATION = 5 # Frames over which to blend old/new chunks\n", - "PLOT_INTERVAL = 1 # Seconds between live plot updates\n", - "\n", - "# --- Warm-up to determine chunk size ---\n", - "warmup_obs = get_full_observation()\n", - "warmup_observation = build_observation(warmup_obs)\n", - "policy._action_queue = deque()\n", - "with torch.no_grad():\n", - " warmup_action = policy.select_action(warmup_observation)\n", - "warmup_actions = [warmup_action.detach().cpu().numpy()]\n", - "while len(policy._action_queue) > 0:\n", - " a = policy._action_queue.popleft()\n", - " warmup_actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a)\n", - "warmup_chunk = np.array([np.squeeze(a) for a in warmup_actions], dtype=np.float32)\n", - "chunk_size = warmup_chunk.shape[0]\n", - "threshold = int(chunk_size * QUEUE_THRESHOLD_FRAC)\n", - "print(f\"Chunk size: {chunk_size}, threshold: {threshold}\")\n", - "\n", - "# --- Telemetry collector ---\n", - "telemetry = {\n", - " \"t\": [], # wall time per step\n", - " \"actions_sent\": [], # action_array for each action sent\n", - " \"queue_depth\": [], # (t, remaining) per step\n", - " \"infer_requests\": [], # t when inference was requested\n", - " \"infer_completions\": [], # (t, latency_ms, offset)\n", - " \"chunks_received\": [], # (t_received, chunk_array, inference_num)\n", - " \"holds\": [], # t when queue was empty (holding position)\n", - "}\n", - "t_start = time.perf_counter()\n", - "\n", - "# --- Live plotting ---\n", - "import matplotlib.patches as mpatches\n", - "from IPython.display import display\n", - "\n", - "def render_telemetry(telemetry, threshold, chunk_size):\n", - " \"\"\"Render telemetry to a matplotlib figure.\"\"\"\n", - " actions_arr = np.array(telemetry[\"actions_sent\"])\n", - " t_arr = np.array(telemetry[\"t\"])\n", - " if len(t_arr) < 2:\n", - " return None\n", - "\n", - " queue_t, queue_d = zip(*telemetry[\"queue_depth\"])\n", - " n_joints = len(JOINT_NAMES)\n", - " n_chunks = len(telemetry[\"chunks_received\"])\n", - " chunk_cmap = plt.cm.Set2\n", - "\n", - " fig, axes = plt.subplots(n_joints + 1, 1, figsize=(16, 2.5 * (n_joints + 1)), sharex=True)\n", - "\n", - " for j, jn in enumerate(JOINT_NAMES):\n", - " ax = axes[j]\n", - " ax.plot(t_arr, actions_arr[:, j], color=\"black\", linewidth=1.2, zorder=3)\n", - "\n", - " for idx, (t_recv, chunk, _) in enumerate(telemetry[\"chunks_received\"]):\n", - " color = chunk_cmap(idx % 8)\n", - " chunk_t = t_recv + np.arange(len(chunk)) / ASYNC_FPS\n", - " ax.plot(chunk_t, chunk[:, j], color=color, alpha=0.4, linewidth=1.5, zorder=2)\n", - "\n", - " for t_req in telemetry[\"infer_requests\"]:\n", - " ax.axvline(t_req, color=\"green\", alpha=0.3, linewidth=0.8, linestyle=\"--\")\n", - " for t_comp, _, _ in telemetry[\"infer_completions\"]:\n", - " ax.axvline(t_comp, color=\"blue\", alpha=0.3, linewidth=0.8, linestyle=\"-.\")\n", - " for ht in telemetry[\"holds\"]:\n", - " ax.axvspan(ht, ht + 1/ASYNC_FPS, color=\"red\", alpha=0.15)\n", - "\n", - " ax.set_ylabel(jn, fontsize=9)\n", - " ax.tick_params(labelsize=8)\n", - "\n", - " ax_q = axes[-1]\n", - " ax_q.fill_between(queue_t, queue_d, alpha=0.3, color=\"steelblue\")\n", - " ax_q.plot(queue_t, queue_d, color=\"steelblue\", linewidth=1)\n", - " ax_q.axhline(threshold, color=\"orange\", linestyle=\"--\", linewidth=1, label=f\"threshold ({threshold})\")\n", - " for t_req in telemetry[\"infer_requests\"]:\n", - " ax_q.axvline(t_req, color=\"green\", alpha=0.5, linewidth=1, linestyle=\"--\")\n", - " for t_comp, _, _ in telemetry[\"infer_completions\"]:\n", - " ax_q.axvline(t_comp, color=\"blue\", alpha=0.5, linewidth=1, linestyle=\"-.\")\n", - " ax_q.set_ylabel(\"Queue\", fontsize=9)\n", - " ax_q.set_xlabel(\"Time (s)\", fontsize=10)\n", - " ax_q.legend(loc=\"upper right\", fontsize=8)\n", - " ax_q.tick_params(labelsize=8)\n", - "\n", - " legend_handles = [\n", - " mpatches.Patch(color=\"black\", label=\"Sent to robot\"),\n", - " mpatches.Patch(color=\"green\", alpha=0.5, label=\"Infer requested\"),\n", - " mpatches.Patch(color=\"blue\", alpha=0.5, label=\"Chunk received\"),\n", - " mpatches.Patch(color=\"red\", alpha=0.3, label=\"Hold\"),\n", - " ]\n", - " for idx in range(min(n_chunks, 6)):\n", - " legend_handles.append(mpatches.Patch(color=chunk_cmap(idx % 8), alpha=0.5, label=f\"Chunk #{idx+1}\"))\n", - " fig.legend(handles=legend_handles, loc=\"upper center\", ncol=min(len(legend_handles), 5), fontsize=8, bbox_to_anchor=(0.5, 1.02))\n", - "\n", - " n_holds = len(telemetry[\"holds\"])\n", - " fig.suptitle(f\"Live — {len(t_arr)} steps, {n_chunks} inferences, {n_holds} holds, t={t_arr[-1]:.1f}s\", fontsize=11, y=1.04)\n", - " plt.tight_layout()\n", - " return fig\n", - "\n", - "# Show initial empty plot placeholder\n", - "plot_handle = display(plt.figure(figsize=(16, 2)), display_id=True)\n", - "plt.close()\n", - "\n", - "# --- Init ---\n", - "queue = QueueMixer(lerp_duration=LERP_DURATION)\n", - "queue.add(warmup_chunk) # Seed so robot can move immediately\n", - "\n", - "infer_thread = GpuInferenceThread(policy, DEVICE)\n", - "infer_thread.start()\n", - "\n", - "goal_time = 1.0 / ASYNC_FPS\n", - "last_action = warmup_chunk[0]\n", - "step = 0\n", - "hold_count = 0\n", - "last_plot_time = 0.0\n", - "\n", - "print(f\"Async rollout at {ASYNC_FPS} Hz — interrupt kernel to stop\")\n", - "\n", - "try:\n", - " while True:\n", - " loop_start = time.perf_counter()\n", - " t_rel = loop_start - t_start\n", - "\n", - " # 1. Check for inference result\n", - " result = infer_thread.get_result()\n", - " if result is not None:\n", - " chunk, latency = result\n", - " offset = int(latency * ASYNC_FPS)\n", - " queue.add(chunk, offset=offset)\n", - " queue.lerp_duration = max(offset, 1)\n", - " telemetry[\"infer_completions\"].append((t_rel, latency * 1000, offset))\n", - " telemetry[\"chunks_received\"].append((t_rel, chunk.copy(), infer_thread.inference_count))\n", - " print(f\"[t={t_rel:.1f}s] <<< inference #{infer_thread.inference_count}: \"\n", - " f\"{latency*1000:.0f}ms, offset={offset}, queue={queue.remaining}\")\n", - "\n", - " # 2. Request new inference when queue is low (with stuck/dead detection)\n", - " INFER_TIMEOUT = 15.0\n", - " if queue.remaining <= threshold:\n", - " if not infer_thread.alive:\n", - " print(f\"[t={t_rel:.1f}s] !!! Inference thread DEAD — restarting\")\n", - " infer_thread = GpuInferenceThread(policy, DEVICE)\n", - " infer_thread.start()\n", - " elif infer_thread.busy_duration > INFER_TIMEOUT:\n", - " print(f\"[t={t_rel:.1f}s] !!! Inference stuck for {infer_thread.busy_duration:.0f}s — force resetting\")\n", - " infer_thread.force_reset()\n", - "\n", - " if not infer_thread.busy:\n", - " obs = get_full_observation()\n", - " submitted = infer_thread.request(obs)\n", - " if submitted:\n", - " telemetry[\"infer_requests\"].append(t_rel)\n", - " print(f\"[t={t_rel:.1f}s] >>> inference requested (queue={queue.remaining}/{chunk_size})\")\n", - "\n", - " # 3. Pop action\n", - " action = queue.pop()\n", - " if action is not None:\n", - " last_action = action\n", - " hold_count = 0\n", - " else:\n", - " action = last_action\n", - " hold_count += 1\n", - " telemetry[\"holds\"].append(t_rel)\n", - " if hold_count == 1 or hold_count % 30 == 0:\n", - " print(f\"[t={t_rel:.1f}s] !!! Queue empty, holding (hold={hold_count})\")\n", - "\n", - " # 4. Send to robot\n", - " if action is not None:\n", - " action_dict = {f\"{jn}.pos\": float(action[i]) for i, jn in enumerate(JOINT_NAMES)}\n", - " robot.send_action(action_dict)\n", - "\n", - " # 5. Record telemetry\n", - " telemetry[\"t\"].append(t_rel)\n", - " telemetry[\"actions_sent\"].append(action.copy() if action is not None else last_action.copy())\n", - " telemetry[\"queue_depth\"].append((t_rel, queue.remaining))\n", - "\n", - " # 6. Live plot update (throttled)\n", - " if t_rel - last_plot_time > PLOT_INTERVAL and len(telemetry[\"t\"]) > 10:\n", - " fig = render_telemetry(telemetry, threshold, chunk_size)\n", - " if fig is not None:\n", - " plot_handle.update(fig)\n", - " plt.close(fig)\n", - " last_plot_time = t_rel\n", - "\n", - " # 7. Maintain FPS\n", - " elapsed = time.perf_counter() - loop_start\n", - " sleep_time = goal_time - elapsed\n", - " if sleep_time > 0:\n", - " time.sleep(sleep_time)\n", - "\n", - " step += 1\n", - "\n", - "except KeyboardInterrupt:\n", - " print(f\"\\nStopped after {step} steps, {infer_thread.inference_count} inferences, \"\n", - " f\"{len(telemetry['holds'])} holds, duration={time.perf_counter() - t_start:.1f}s\")\n", - "\n", - "infer_thread.stop()\n", - "print(\"Inference thread stopped\")\n", - "\n", - "# Final plot\n", - "fig = render_telemetry(telemetry, threshold, chunk_size)\n", - "if fig is not None:\n", - " plot_handle.update(fig)\n", - " plt.close(fig)\n", - "\n", - "if telemetry[\"infer_completions\"]:\n", - " latencies_ms = [lat for _, lat, _ in telemetry[\"infer_completions\"]]\n", - " t_arr = np.array(telemetry[\"t\"])\n", - " print(f\"\\nInference latencies: mean={np.mean(latencies_ms):.0f}ms, \"\n", - " f\"min={np.min(latencies_ms):.0f}ms, max={np.max(latencies_ms):.0f}ms\")\n", - " print(f\"Queue empty fraction: {len(telemetry['holds'])/max(len(t_arr),1)*100:.1f}%\")\n", - " print(f\"Total duration: {t_arr[-1]:.1f}s, steps: {len(t_arr)}, effective Hz: {len(t_arr)/t_arr[-1]:.1f}\")" - ] - }, - { - "cell_type": "markdown", - "id": "b0e2a88d", - "metadata": {}, - "source": [ - "## Disconnect" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6eb8f97", - "metadata": {}, - "outputs": [], - "source": [ - "for name, cam in cameras.items():\n", - " cam.disconnect()\n", - "\n", - " print(f\"Camera '{name}' disconnected\")\n", - " \n", - "robot.disconnect()\n", - "print(\"Robot disconnected\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "physical-ai-studio (3.12.3)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/physicalai/robot/__init__.py b/src/physicalai/robot/__init__.py index 1465750..110f90a 100644 --- a/src/physicalai/robot/__init__.py +++ b/src/physicalai/robot/__init__.py @@ -14,13 +14,14 @@ from __future__ import annotations from physicalai.robot.connect import connect -from physicalai.robot.interface import Robot +from physicalai.robot.interface import Robot, RobotObservation from physicalai.robot.verify import verify_robot __all__ = [ "Robot", "connect", "verify_robot", + "RobotObservation", ] diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index bb92225..b0b7e2b 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -110,6 +110,7 @@ def __init__( # noqa: D107 self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=_DEFAULT_LERP_FRAMES)) self._obs_to_input = obs_to_input or default_observation_to_input self._callbacks = list(callbacks) + self._goal_time = (1.0 / fps) * 3 def run(self, *, duration_s: float | None = None) -> RunStats: """Run the control loop. @@ -166,7 +167,7 @@ def run(self, *, duration_s: float | None = None) -> RunStats: if modified is not None: action = modified - self._robot.send_action(action) + self._robot.send_action(action, goal_time=self._goal_time) self._invoke_callback("on_action_sent", action=action, step=step) elapsed = time.perf_counter() - loop_start From 07790a77f0be950325be8139bc417ba0b753aca2 Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Sun, 17 May 2026 22:00:52 +0200 Subject: [PATCH 04/19] remove plan --- .../runtime-system/implementation_plan.md | 1146 ----------------- 1 file changed, 1146 deletions(-) delete mode 100644 docs/design/components/runtime-system/implementation_plan.md diff --git a/docs/design/components/runtime-system/implementation_plan.md b/docs/design/components/runtime-system/implementation_plan.md deleted file mode 100644 index 07b1473..0000000 --- a/docs/design/components/runtime-system/implementation_plan.md +++ /dev/null @@ -1,1146 +0,0 @@ -# Runtime System — Implementation Plan - -This document is the implementation plan for `physicalai.runtime`. It refines the original [policy_runtime_design.md](./policy_runtime_design.md) based on codebase exploration, bug analysis, and architecture review. - -Read [policy_runtime_design.md](./policy_runtime_design.md) first for API shape and ownership rules. This document covers what to build, in what order, and why. - -## Reference Implementation - -The golden reference is `physicalai/examples/runtime/inference_async.py` — a working async prototype with QueueMixer, InferenceThread, velocity clamping, and camera discovery. Every runtime component must match or exceed its behavior. - ---- - -## Phase 1: Critical Bug Fixes (half day) ✅ - -Fix bugs on the code path Phase 2 depends on. Phase 2 defines a public runtime contract — workarounds would calcify into permanent API shape. `predict_action_chunk()` currently raises `RuntimeError` without these fixes because Bug 2's inverted guard blocks the runtime's call to `model.predict_action_chunk(obs)`. This is not stylistic — it is a hard blocker. - -### Bug 1: `use_action_queue` checks manifest, ignores runtime runner - -**File**: `physicalai/src/physicalai/inference/model.py` — `use_action_queue` property - -**Problem**: Reads `self.manifest.model.runner` class_path. Ignores `self.runner` passed at construction or set at runtime. - -**Fix**: Check `isinstance(self.runner, ActionChunking)` instead: - -```python -@property -def use_action_queue(self) -> bool: - from physicalai.inference.runners.action_chunking import ActionChunking - return isinstance(self.runner, ActionChunking) -``` - -### Bug 2: `select_action()` / `predict_action_chunk()` guards inverted - -**File**: `physicalai/src/physicalai/inference/model.py` - -**Problem**: `select_action()` raises when `not use_action_queue`. `predict_action_chunk()` raises when `use_action_queue`. Both are backwards — `select_action()` should be the generic one-action API, `predict_action_chunk()` should return raw chunks. - -**Fix**: Remove guards entirely. Both methods should work for any runner (shape-stable contract per design doc §3): - -| Runner | `select_action()` | `predict_action_chunk()` | -| --------------- | ------------------ | ------------------------ | -| single-pass | runner output | wrap as `(1, D)` chunk | -| chunk-producing | pop one via cursor | runner output | - -### Bug 3: ACT export manifest declares SinglePass instead of ActionChunking - -**File**: `library/src/physicalai/export/mixin_policy.py` — `_build_manifest()` - -**Problem**: Checks `metadata.get("use_action_queue", False)` but ACT never sets this metadata flag. Manifest always gets `SinglePass` runner. - -**Fix**: ACT export must pass `use_action_queue=True` and `chunk_size=` in its metadata kwargs. Same fix needed for Pi0.5 (Bug 5). - -### Deferred Bugs (document as issues, do not block Phase 2) - -| Bug | Summary | Why deferred | -| ----- | --------------------------------------------------------------------- | ------------------------------------------------------- | -| Bug 4 | Manifest missing `hardware` section (`RobotSpec`, `CameraSpec`) | Not on inference code path | -| Bug 5 | Pi0.5 export also declares SinglePass | Same root cause as Bug 3, fix together | -| Bug 7 | Pi0.5 normalization not baked into graph (external pre/postprocessor) | By design — manifest `preprocessors_specs` handles this | -| Bug 8 | Pi0.5 denoising loop not exportable cleanly (11x graph size) | Export concern, not runtime concern | -| Bug 9 | `OVTokenizer` may need `import openvino_tokenizers` for custom ops | Needs verification — may already work via adapter | - ---- - -## Phase 2: Runtime System (2–3 days) ✅ - -New package: `physicalai/src/physicalai/runtime/` - -```text -physicalai/src/physicalai/runtime/ -├── __init__.py # exports: PolicyRuntime, SyncExecution, AsyncExecution, -│ # ActionQueue, LerpSmoother, ReplaceSmoother, RunStats -├── smoothers.py # ChunkSmoother ABC, ReplaceSmoother, LerpSmoother -├── _action_queue.py # ActionQueue (public via __init__, internal module) -├── execution.py # Execution ABC, SyncExecution, AsyncExecution, WorkerDiedError -└── runtime.py # PolicyRuntime, RunStats, default_observation_to_input -``` - -Dependency order: `smoothers.py` → `_action_queue.py` → `execution.py` → `runtime.py` → `__init__.py` - -### Architectural Decisions - -**ActionQueue is owned by PolicyRuntime, not hidden inside Execution.** - -The original design doc keeps Execution (scheduling) and ActionQueue (buffering) as separate concerns. This is correct — when `AsyncExecution(transport="process")` or `RemoteExecution` arrive, they should push chunks into the same ActionQueue without duplicating buffer logic. - -Users get a clean default API: - -```python -runtime = PolicyRuntime( - robot=robot, - model=model, - execution=AsyncExecution(threshold=0.5), - fps=30, -) -runtime.run(duration_s=60) -``` - -Power users can override buffering: - -```python -runtime = PolicyRuntime( - robot=robot, - model=model, - execution=AsyncExecution(threshold=0.5), - action_queue=ActionQueue(smoother=LerpSmoother(duration_frames=10)), - fps=30, -) -``` - -**Execution is a scheduler, not a buffer.** It decides when/where inference runs and pushes results into ActionQueue. It does not own pop, remaining, or chunk_size. - -**InferenceModel must NOT import ActionQueue.** Per design doc §4: if both layers need pop-from-chunk mechanics, they share `ActionChunkCursor`, not `ActionQueue`. - -### 2.1 `smoothers.py` - -Extracted from `QueueMixer.add()` in inference_async.py. - -```python -class ChunkSmoother(ABC): - """Merges a new action chunk into remaining actions from the previous chunk.""" - - @abstractmethod - def merge( - self, - remaining: np.ndarray, # (R, action_dim) — unconsumed actions from previous chunk - incoming: np.ndarray, # (H, action_dim) — new chunk from inference - offset: int, # skip first N actions of incoming (latency compensation) - ) -> np.ndarray: - """Return merged actions array. Called by ActionQueue.push_chunk().""" - ... - - -class ReplaceSmoother(ChunkSmoother): - """Drop remaining actions, use incoming[offset:].""" - - def merge(self, remaining, incoming, offset): - return incoming[offset:] - - -class LerpSmoother(ChunkSmoother): - """Lerp-blend overlapping region, then append non-overlapping tail. - - Stateless merge — no hidden mutation. duration_frames is the fallback - blending window used when offset is 0. When offset > 0, the blending - window is computed from offset directly: lerp_dur = max(offset, 1). - - Matches QueueMixer.add() from inference_async.py: - - Weights: w_i = max(1.0 - i / lerp_dur, 0.0) for old actions - - Overlap region: blended = w * remaining + (1 - w) * incoming - """ - - def __init__(self, duration_frames: int = 5) -> None: - self.duration_frames = duration_frames - - def merge(self, remaining, incoming, offset): - lerp_dur = max(offset, 1) if offset > 0 else self.duration_frames - - incoming = incoming[offset:] - n_remain = len(remaining) - lerp_dur = min(n_remain, lerp_dur) - - weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0) - weights = weights[:, np.newaxis] - - n_blend = min(n_remain, len(incoming)) - blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend] - - return np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32) -``` - -Key: `offset` is not just "skip N actions." It is latency compensation — `offset = int(inference_latency * fps)`. The smoother must handle this, not the caller. - -### 2.2 `_action_queue.py` (public API, internal module) - -Thread-safe action buffer with smoother integration and hold telemetry. - -```python -class ActionQueue: - """Thread-safe action buffer with chunk smoothing and starvation telemetry. - - Public API — exported from physicalai.runtime. Power users can override - the default ActionQueue on PolicyRuntime to customize smoothing behavior. - Execution pushes chunks into it; PolicyRuntime pops actions from it. - """ - - def __init__(self, smoother: ChunkSmoother | None = None) -> None: - self._smoother = smoother or ReplaceSmoother() - self._deque: deque[np.ndarray] = deque() - self._lock = threading.Lock() - self._consecutive_holds: int = 0 - self._total_holds: int = 0 - self._total_pops: int = 0 - - def push_chunk(self, chunk: np.ndarray, offset: int = 0) -> None: - """Merge a new chunk into the queue. Thread-safe.""" - with self._lock: - remaining = (np.stack(list(self._deque)) - if self._deque - else np.empty((0, chunk.shape[1]), dtype=chunk.dtype)) - merged = self._smoother.merge(remaining, chunk, offset) - self._deque.clear() - self._deque.extend(merged) - - def pop(self) -> np.ndarray | None: - """Pop next action, or None if empty. Thread-safe.""" - with self._lock: - if not self._deque: - self._consecutive_holds += 1 - self._total_holds += 1 - return None - self._consecutive_holds = 0 - self._total_pops += 1 - return self._deque.popleft() - - @property - def remaining(self) -> int: - with self._lock: - return len(self._deque) - - def below_threshold(self, threshold: int) -> bool: - return self.remaining < threshold - - def clear(self) -> None: - with self._lock: - self._queue = None - self._index = 0 - - # --- Telemetry --- - @property - def consecutive_holds(self) -> int: - return self._consecutive_holds - - @property - def total_holds(self) -> int: - return self._total_holds - - @property - def total_pops(self) -> int: - return self._total_pops -``` - -### 2.3 `execution.py` - -**Execution ABC** — scheduler only. Pushes chunks into ActionQueue, does not own pop/remaining. - -```python -class Execution(ABC): - """Decides when and where inference runs. Pushes results into ActionQueue.""" - - @abstractmethod - def start(self, model: InferenceModel, action_queue: ActionQueue) -> None: - """Bind to model and queue. Called once before the loop.""" - ... - - @abstractmethod - def maybe_request(self, observation: dict[str, np.ndarray]) -> None: - """Check if new inference is needed. If so, run or schedule it.""" - ... - - @abstractmethod - def warmup(self, sample_observation: dict[str, np.ndarray]) -> None: - """Run one inference to discover chunk_size and seed the queue. - - After warmup(): - - action_queue has one chunk ready (robot starts moving immediately) - - self.chunk_size is set - - self.action_dim is set - """ - ... - - @abstractmethod - def stop(self) -> None: - """Stop scheduling. For async: signal thread, join with timeout.""" - ... - - @property - @abstractmethod - def chunk_size(self) -> int: - """Discovered after warmup(). Used to compute threshold.""" - ... -``` - -**SyncExecution** — blocks on inference when queue runs low. - -```python -class SyncExecution(Execution): - """Synchronous inference in the control thread.""" - - def __init__(self) -> None: - self._model: InferenceModel | None = None - self._queue: ActionQueue | None = None - self._chunk_size: int = 0 - - def start(self, model, action_queue): - self._model = model - self._queue = action_queue - - def warmup(self, sample_observation): - actions = self._model.predict_action_chunk(sample_observation) - self._chunk_size = actions.shape[0] - self._queue.push_chunk(actions, offset=0) - - def maybe_request(self, observation): - if self._queue.below_threshold(1): # refill when empty - actions = self._model.predict_action_chunk(observation) - self._queue.push_chunk(actions, offset=0) - - def stop(self): - pass - - @property - def chunk_size(self): - return self._chunk_size -``` - -**AsyncExecution** — background thread with health monitoring. Maps to `InferenceThread` from inference_async.py. - -```python -class WorkerDiedError(RuntimeError): - """Raised when the inference worker thread dies unexpectedly.""" - pass - - -class AsyncExecution(Execution): - """Async inference in a background thread. - - Thread architecture (matches inference_async.py): - - Control thread (main): Inference thread (background): - ───────────────────── ──────────────────────────── - loop at fps: loop: - obs = robot.get_observation() wait for obs_slot - execution.maybe_request(obs) chunk = model.predict_action_chunk(obs) - action = queue.pop() offset = int(latency * fps) - robot.send_action(action) queue.push_chunk(chunk, offset) - """ - - def __init__( - self, - threshold: float = 0.5, - fps: int = 30, - watchdog_timeout_s: float = 30.0, - max_consecutive_holds: int | None = None, # default: 3 * fps (3 seconds) - ) -> None: - self._threshold_frac = threshold - self._fps = fps - self._watchdog_timeout_s = watchdog_timeout_s - self._max_consecutive_holds = max_consecutive_holds or 3 * fps - - # Set during start() - self._model: InferenceModel | None = None - self._queue: ActionQueue | None = None - self._chunk_size: int = 0 - self._threshold_count: int = 0 - - # Thread state - self._lock = threading.Lock() - self._obs_slot: dict | None = None - self._obs_ready = threading.Event() - self._running_inference = False - self._request_time: float = 0.0 - self._stop_event = threading.Event() - self._thread: threading.Thread | None = None - self._death_cause: BaseException | None = None - self._inference_count: int = 0 - - def start(self, model, action_queue): - self._model = model - self._queue = action_queue - self._thread = threading.Thread(target=self._run, name="InferenceThread", daemon=True) - self._thread.start() - - def warmup(self, sample_observation): - actions = self._model.predict_action_chunk(sample_observation) - self._chunk_size = actions.shape[0] - self._threshold_count = int(self._chunk_size * self._threshold_frac) - self._queue.push_chunk(actions, offset=0) - - def maybe_request(self, observation): - # Check for worker death — raise, don't silently continue - if self._thread is not None and not self._thread.is_alive(): - if self._death_cause is not None: - raise WorkerDiedError( - f"Inference thread died: {self._death_cause}" - ) from self._death_cause - - # Check for stuck inference - if self._busy_duration > self._watchdog_timeout_s: - logger.warning( - "Inference stuck for %.0fs — force resetting", self._busy_duration, - ) - self._force_reset() - - # Submit if queue is low and worker is idle - if self._queue.below_threshold(self._threshold_count): - if not self._busy: - # Defensive copy — observation may be reused by caller - snapshot = { - k: v.copy() if isinstance(v, np.ndarray) else v - for k, v in observation.items() - } - with self._lock: - self._obs_slot = snapshot - self._request_time = time.perf_counter() - self._obs_ready.set() - - def stop(self): - if self._thread is not None: - self._stop_event.set() - self._obs_ready.set() # unblock wait - self._thread.join(timeout=10.0) - - @property - def chunk_size(self): - return self._chunk_size - - # --- Health properties --- - - @property - def alive(self) -> bool: - return self._thread is not None and self._thread.is_alive() - - @property - def _busy(self) -> bool: - with self._lock: - return self._obs_slot is not None or self._running_inference - - @property - def _busy_duration(self) -> float: - with self._lock: - if not (self._obs_slot is not None or self._running_inference): - return 0.0 - return time.perf_counter() - self._request_time - - @property - def inference_count(self) -> int: - return self._inference_count - - # --- Internal --- - - def _force_reset(self) -> None: - with self._lock: - self._obs_slot = None - self._running_inference = False - logger.warning("Force reset — cleared stuck inference state") - - def _run(self) -> None: - """Inference thread main loop.""" - try: - while not self._stop_event.is_set(): - self._obs_ready.wait() - self._obs_ready.clear() - - if self._stop_event.is_set(): - return - - with self._lock: - obs = self._obs_slot - self._obs_slot = None - if obs is None: - continue - self._running_inference = True - - t0 = time.perf_counter() - actions = self._model.predict_action_chunk(obs) - latency = time.perf_counter() - t0 - - offset = int(latency * self._fps) - - self._queue.push_chunk(actions, offset=offset) - self._inference_count += 1 - - with self._lock: - self._running_inference = False - - except Exception as e: - self._death_cause = e - logger.error("Inference thread died: %s", e, exc_info=True) -``` - -### 2.4 `runtime.py` - -```python -from __future__ import annotations - -import logging -import time -from collections.abc import Callable, Mapping, Sequence -from typing import Any, Protocol - -import numpy as np - -from physicalai.capture.camera import Camera, Frame -from physicalai.inference import InferenceModel -from physicalai.robot.interface import Robot, RobotObservation -from physicalai.runtime._action_queue import ActionQueue -from physicalai.runtime.execution import Execution, WorkerDiedError -from physicalai.runtime.smoothers import LerpSmoother - -logger = logging.getLogger(__name__) - -def default_observation_to_input( - robot_obs: RobotObservation, - camera_frames: dict[str, Frame], -) -> dict[str, Any]: - """Default observation-to-model-input conversion. - - Maps: - - Joint positions → "state" array - - Camera frames → "images.{name}" arrays - - For Pi0.5 or other models needing custom keys (e.g. "task"), - pass a custom obs_to_input callable to PolicyRuntime. - """ - model_input: dict[str, Any] = {} - - # Collect joint positions into "state" vector - if robot_obs.joint_positions: - model_input["state"] = np.array([robot_obs.joint_positions], dtype=np.float32) - - # Map camera frames to "images.{name}" - for name, frame in camera_frames.items(): - model_input[f"images.{name}"] = frame.data - - return model_input - - -class RuntimeCallback(Protocol): - """Optional hook points in the PolicyRuntime control loop.""" - - def before_send_action(self, *, action: np.ndarray, step: int) -> np.ndarray | None: - """Called before sending action. Return modified action or None to use original.""" - ... - - def on_action_sent(self, *, action: np.ndarray, step: int) -> None: - """Called after action is sent to robot.""" - ... - - def on_hold(self, *, step: int, holds: int) -> None: - """Called when action queue is empty and robot holds last position.""" - ... - - -class PolicyRuntime: - """Runs a policy on robot hardware. - - Loop shape (matches inference_async.py): - obs = robot.get_observation() - model_input = obs_to_input(obs, cameras) - execution.maybe_request(model_input) - action = action_queue.pop() - if action is None: hold position - robot.send_action(action) - sleep_until_next_tick() - """ - - def __init__( - self, - robot: Robot, - model: InferenceModel, - execution: Execution, - fps: float, - cameras: Mapping[str, Camera] | None = None, - action_queue: ActionQueue | None = None, - obs_to_input: Callable[[RobotObservation, dict[str, Frame]], dict[str, Any]] | None = None, - callbacks: Sequence[RuntimeCallback] = (), - ) -> None: - self._robot = robot - self._model = model - self._execution = execution - self._fps = fps - self._cameras = cameras or {} - self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=5)) - self._obs_to_input = obs_to_input or default_observation_to_input - self._callbacks = list(callbacks) - - def run(self, *, duration_s: float | None = None) -> RunStats: - """Run the control loop. - - 1. Warm up — run one inference, seed queue, discover chunk_size - 2. Loop — observe, maybe_request, pop, send, sleep - 3. Shutdown — stop execution, drain - """ - # --- Init --- - self._execution.start(self._model, self._action_queue) - - sample_obs = self._build_model_input() - self._execution.warmup(sample_obs) - - goal_time = 1.0 / self._fps - step = 0 - last_action: np.ndarray | None = None - - try: - while True: - if duration_s is not None and step * goal_time >= duration_s: - break - - loop_start = time.perf_counter() - - # 1. Observe - obs = self._build_model_input() - - # 2. Maybe request inference - self._execution.maybe_request(obs) - - # 3. Pop action - action = self._action_queue.pop() - if action is not None: - last_action = action - else: - action = last_action - holds = self._action_queue.consecutive_holds - if holds == 1: - logger.warning("Queue empty — holding position") - elif holds % self._fps == 0: - logger.warning( - "Queue starvation: %d consecutive holds (%.1fs)", - holds, holds / self._fps, - ) - self._invoke_callback("on_hold", step=step, holds=holds) - - if action is None: - # No warmup result and no previous action — skip - logger.error("No action available (warmup may have failed)") - continue - - # 4. Callbacks - action = self._invoke_callback("before_send_action", action=action, step=step) or action - - # 5. Send - self._robot.send_action(action) - self._invoke_callback("on_action_sent", action=action, step=step) - - # 6. Timing - elapsed = time.perf_counter() - loop_start - sleep_time = goal_time - elapsed - if sleep_time > 0: - time.sleep(sleep_time) - - step += 1 - - except KeyboardInterrupt: - logger.info("Interrupted by user") - except WorkerDiedError as e: - logger.error("Worker died during runtime: %s", e) - raise - finally: - self._shutdown(step) - - return RunStats( - steps=step, - total_pops=self._action_queue.total_pops, - total_holds=self._action_queue.total_holds, - inference_count=getattr(self._execution, "inference_count", 0), - ) - - def _build_model_input(self) -> dict[str, Any]: - robot_obs = self._robot.get_observation() - camera_frames = {name: cam.read_latest() for name, cam in self._cameras.items()} - return self._obs_to_input(robot_obs, camera_frames) - - def _shutdown(self, step: int) -> None: - """Robot and cameras must be connected before run(). Caller owns lifecycle.""" - # 1. Stop inference scheduling - self._execution.stop() - - # 2. Drain remaining actions (up to 1s) for smooth stop - remaining = self._action_queue.remaining - drain_limit = min(remaining, int(self._fps)) - for _ in range(drain_limit): - action = self._action_queue.pop() - if action is not None: - self._robot.send_action(action) - time.sleep(1.0 / self._fps) - - logger.info( - "Shutdown complete — %d steps, %d pops, %d holds", - step, self._action_queue.total_pops, self._action_queue.total_holds, - ) - - def _invoke_callback(self, method: str, **kwargs): - result = None - for cb in self._callbacks: - fn = getattr(cb, method, None) - if fn is not None: - result = fn(**kwargs) - return result -``` - -### 2.5 Tests - -```text -physicalai/tests/unit/runtime/ -├── test_smoothers.py # ReplaceSmoother, LerpSmoother: offset handling, lerp weights, -│ # dynamic duration, edge cases (empty remaining, offset > chunk) -├── test_action_queue.py # push/pop, smoother integration, thread safety (concurrent -│ # push+pop from 2 threads), hold counters, clear() -├── test_execution.py # SyncExecution: warmup seeds queue, maybe_request refills -│ # AsyncExecution: mock model, health monitoring (alive/busy/ -│ # busy_duration), WorkerDiedError propagation, force_reset -└── test_runtime.py # PolicyRuntime with mock robot + mock model: - # full loop, hold fallback, shutdown drain, callbacks, - # WorkerDiedError propagation, duration_s limit -``` - -All tests use mock `InferenceModel` and mock `Robot` — no hardware, no exported models. - ---- - -## Phase 3: CLI and Integration (1–2 days) - -1. `physicalai run --config so101_pi05.yaml` CLI command -2. YAML config loader (`PolicyRuntime.from_config()`) -3. Observation builder (bridges `Robot` protocol + `Camera` → model input dict) -4. Migrate `inference_async.py` to use `PolicyRuntime` (becomes ~20 lines) - -### Velocity clamping and camera discovery stay outside core - -Velocity clamping (`max_speed`, `ramp_steps`, `commanded_pos` tracking) is SO-101-specific. Camera discovery (interactive selection, name mapping, blank cameras, flip) is app-specific. - -These belong in: - -- Example scripts (`examples/runtime/`) -- CLI helpers (`physicalai.cli.run`) -- User callbacks - -Not in `PolicyRuntime` or `Execution`. If a reusable pattern emerges across 2+ robots, promote to a formal action-transform layer later. - ---- - -## Phase 3.5: Runtime Telemetry (1–2 days) - -Streaming observability for the runtime control loop. The runtime process must not be bottlenecked by telemetry — all emission is fire-and-forget over zenoh pub-sub. A separate observer process handles visualization, aggregation, and persistence. - -### Constraint - -Two-week release deadline. Scope: emitter in runtime, observer CLI, zenoh-only transport. No OpenTelemetry, no Studio UI integration, no remote telemetry. Use OTel-compatible metric naming so a future OTLP bridge is trivial. - -### Architecture - -```text -┌──────────────────────────────────────────┐ -│ Runtime Process (real-time budget) │ -│ │ -│ PolicyRuntime.run() │ -│ └─ TelemetryEmitter │ -│ zenoh.put() per tick/inference │ -│ no-op if zenoh not installed │ -└──────────┬───────────────────────────────┘ - │ zenoh pub-sub (local SHM) - ▼ -┌──────────────────────────────────────────┐ -│ Observer Process (separate, optional) │ -│ │ -│ TelemetrySubscriber │ -│ ├─ Live console dashboard │ -│ ├─ JSONL file sink (--record) │ -│ └─ Future: WebSocket → Studio UI │ -└──────────────────────────────────────────┘ -``` - -### Topic Schema - -All topics prefixed with `physicalai/rt/{session_id}/`. Session ID is a short hex string generated at `run()` start. - -| Topic | Frequency | Payload (msgpack) | Purpose | -| ----------- | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | -| `tick` | Every control loop tick (30 Hz) | `step`, `timestamp`, `joint_positions: (D,)`, `action_sent: (D,)`, `queue_remaining: int`, `loop_duration_s`, `sleep_time_s` | Core loop health + executed trajectory | -| `inference` | Per inference completion (~2–3 Hz) | `latency_s`, `offset`, `chunk: (H, D)` | Latency monitoring + predicted trajectory | -| `lifecycle` | On start / warmup / shutdown / error | `event: str`, `metadata: dict` | Session boundaries | - -**Serialization**: msgpack with numpy arrays encoded as `{"__np__": true, "dtype": str, "shape": list, "data": bytes}`. Encode cost is ~50 μs per tick event, ~100 μs per inference event. Negligible in a 33 ms tick budget. - -**Naming convention**: Scalar fields use OTel-compatible names (`physicalai.runtime.loop_duration_s`, `physicalai.runtime.inference_latency_s`) so a future OTLP exporter in the observer can re-export without renaming. - -### New Files - -```text -physicalai/src/physicalai/runtime/ -├── _telemetry.py # TelemetryEmitter (zenoh publisher, no-op fallback) -└── observer/ - ├── __init__.py - ├── __main__.py # python -m physicalai.runtime.observer - ├── _subscriber.py # TelemetrySubscriber (zenoh sub + dispatch) - ├── _console.py # Live console dashboard - └── _recorder.py # JSONL file sink for offline replay -``` - -### Dependency - -```toml -# pyproject.toml -[project.optional-dependencies] -telemetry = ["zenoh>=1.0", "msgpack>=1.0"] -``` - -zenoh is optional. `TelemetryEmitter` gracefully degrades to no-op when zenoh is not installed. Observer process requires `physicalai[telemetry]`. - -### ActionQueue Change - -Add `peek_remaining()` — returns a copy of queued actions as `(R, D)` array without consuming them. Called once per inference event (not per tick) to snapshot the post-smooth future trajectory. - -```python -def peek_remaining(self) -> np.ndarray | None: - """Return copy of remaining actions without consuming them. Thread-safe.""" - with self._lock: - if not self._deque: - return None - return np.stack(list(self._deque)) -``` - -### Integration Points - -**PolicyRuntime** (4 touch points in `runtime.py`): - -1. Accept optional `telemetry: TelemetryEmitter | None` in `__init__` -2. `emit_lifecycle("start")` at top of `run()` -3. `emit_tick(...)` at end of each tick -4. `emit_lifecycle("shutdown")` in `_shutdown()` - -**AsyncExecution** (1 touch point in `execution.py`): - -1. Accept optional `telemetry: TelemetryEmitter | None` in `__init__` -2. `emit_inference(...)` after `push_chunk()` in the `_run()` thread - -No changes to `smoothers.py` or `__init__.py` exports. `TelemetryEmitter` is internal wiring, not user-facing API. - -### Out of Scope - -- Remote telemetry (zenoh is network-transparent; enabling it is config, not code) -- Camera frame streaming (stays on iceoryx2) -- OpenTelemetry / OTLP exporter (future observer plugin) -- Studio UI WebSocket bridge -- Model input snapshot recording (expensive, needs opt-in design) -- Smoothing delta visualization (derivable in observer from `inference.chunk` vs `tick.action_sent`) - ---- - -## Phase 3.6: Fault Tolerance (1 day) - -Robot connections over USB/serial and camera feeds are fragile. USB hubs lose power, serial timeouts occur, cameras drop frames. The runtime must not crash on transient hardware errors — it must retry and recover, or degrade gracefully. - -### Current Problem - -`PolicyRuntime._build_model_input()` calls `robot.get_observation()` and `cam.read_latest()` with no error handling. `robot.send_action()` is also unprotected. Any `ConnectionError`, `OSError`, or `serial.SerialException` from the SO-101 serial bus kills the control loop. - -### Design Principles - -1. **Never crash the loop on a transient error.** Retry the operation. If retries are exhausted, hold position and log — do not raise. -2. **Distinguish transient vs fatal.** USB disconnect that resolves after replug = transient. `ValueError` from wrong action shape = fatal (programmer error). `KeyboardInterrupt` = always propagate. -3. **No silent degradation.** Every recovery must log a warning and emit a telemetry lifecycle event. The operator must know the robot hiccupped. -4. **Bound retry duration.** Retries must not exceed the tick budget. If `get_observation()` fails 3 times within the tick, use the last known observation and move on. -5. **Camera failure ≠ robot failure.** A dropped camera frame should use the last known frame (stale but safe). A robot communication failure is more serious but still retryable. - -### Implementation: `_resilient_observe()` and `_resilient_send()` - -Replace the raw calls in the control loop with retry-wrapped variants: - -```python -_MAX_OBS_RETRIES = 3 -_MAX_SEND_RETRIES = 2 -_RETRY_BACKOFF_S = 0.001 # 1ms between retries — bounded, won't bust 33ms budget - -def _resilient_observe(self) -> dict[str, Any]: - """Build model input with retry on transient hardware errors.""" - # Robot observation - robot_obs = None - for attempt in range(_MAX_OBS_RETRIES): - try: - robot_obs = self._robot.get_observation() - self._consecutive_error_ticks = 0 - break - except (ConnectionError, OSError) as e: - logger.warning("Robot observation failed (attempt %d/%d): %s", - attempt + 1, _MAX_OBS_RETRIES, e) - if self._telemetry: - self._telemetry.emit_lifecycle("obs_error", error=str(e), - attempt=attempt + 1) - if attempt < _MAX_OBS_RETRIES - 1: - time.sleep(_RETRY_BACKOFF_S) - - if robot_obs is None: - self._consecutive_error_ticks += 1 - if self._consecutive_error_ticks >= self._max_consecutive_error_ticks: - msg = (f"Robot unreachable for {self._consecutive_error_ticks} consecutive ticks " - f"({self._consecutive_error_ticks / self._fps:.1f}s)") - if self._telemetry: - self._telemetry.emit_lifecycle("connection_lost", error=msg) - raise ConnectionError(msg) - if self._last_robot_obs is not None: - logger.warning("Using stale robot observation (error tick %d/%d)", - self._consecutive_error_ticks, - self._max_consecutive_error_ticks) - robot_obs = self._last_robot_obs - self._stale_obs_ticks += 1 - else: - raise ConnectionError("Robot observation failed and no fallback available") - self._last_robot_obs = robot_obs - - # Camera frames — independent per camera, each can fail independently - camera_frames: dict[str, Frame] = {} - for name, cam in self._cameras.items(): - try: - camera_frames[name] = cam.read_latest() - self._last_camera_frames[name] = camera_frames[name] - except (ConnectionError, OSError) as e: - logger.warning("Camera '%s' read failed: %s — using last frame", name, e) - if name in self._last_camera_frames: - camera_frames[name] = self._last_camera_frames[name] - self._stale_obs_ticks += 1 - - return self._obs_to_input(robot_obs, camera_frames) - - -def _resilient_send(self, action: np.ndarray) -> None: - """Send action with retry on transient errors.""" - for attempt in range(_MAX_SEND_RETRIES): - try: - self._robot.send_action(action) - self._consecutive_error_ticks = 0 - return - except (ConnectionError, OSError) as e: - logger.warning("send_action failed (attempt %d/%d): %s", - attempt + 1, _MAX_SEND_RETRIES, e) - if self._telemetry: - self._telemetry.emit_lifecycle("send_error", error=str(e), - attempt=attempt + 1) - if attempt < _MAX_SEND_RETRIES - 1: - time.sleep(_RETRY_BACKOFF_S) - self._consecutive_error_ticks += 1 - self._transient_errors += 1 - logger.error("send_action failed after %d attempts — skipping tick", - _MAX_SEND_RETRIES) -``` - -### Error Classification - -| Exception type | Source | Treatment | -| ---------------------------- | --------------------------------------------------------- | --------------------------------- | -| `ConnectionError` | SO-101 serial port closed, servo not responding | Retry, then stale fallback | -| `OSError` | USB disconnect, file descriptor error, camera device lost | Retry, then stale fallback | -| `TimeoutError` | Serial read timeout (subclass of `OSError`) | Retry, then stale fallback | -| `ValueError`, `RuntimeError` | Wrong action shape, uncalibrated mode, programming error | **Fatal** — propagate immediately | -| `WorkerDiedError` | Inference thread crash | **Fatal** — propagate immediately | -| `KeyboardInterrupt` | User Ctrl+C | **Always propagate** | - -### State Tracking - -`PolicyRuntime` gains fields for stale-observation fallback and error escalation: - -```python -self._last_robot_obs: RobotObservation | None = None -self._last_camera_frames: dict[str, Frame] = {} -self._consecutive_error_ticks: int = 0 -self._max_consecutive_error_ticks: int = int(3 * fps) # ~3 seconds -self._stale_obs_ticks: int = 0 -self._transient_errors: int = 0 -``` - -`_consecutive_error_ticks` is reset to 0 on any successful `get_observation()` or `send_action()`. It increments when all retries within a tick fail. When it reaches `max_consecutive_error_ticks`, the runtime raises `ConnectionError`. - -`RunStats` gains fault tolerance metrics: - -```python -@dataclass(frozen=True) -class RunStats: - steps: int - total_pops: int - total_holds: int - inference_count: int - transient_errors: int # total retried hardware errors - stale_obs_ticks: int # ticks where stale observation was used -``` - -The telemetry `tick` event gains a `stale_obs: bool` field so the observer can flag degraded ticks. - -### Warmup Resilience - -Warmup is the most fragile moment — USB may not be fully enumerated, servos may still be initializing. The first call to `_build_model_input()` has no stale fallback. - -Warmup gets its own retry loop with longer timeout: - -```python -_WARMUP_RETRIES = 5 -_WARMUP_BACKOFF_S = 1.0 # 1 second between warmup retries - -def _warmup_with_retry(self) -> None: - for attempt in range(_WARMUP_RETRIES): - try: - sample_obs = self._build_model_input() - self._execution.warmup(sample_obs) - return - except (ConnectionError, OSError) as e: - logger.warning("Warmup failed (attempt %d/%d): %s", - attempt + 1, _WARMUP_RETRIES, e) - if attempt < _WARMUP_RETRIES - 1: - time.sleep(_WARMUP_BACKOFF_S) - msg = f"Warmup failed after {_WARMUP_RETRIES} attempts — robot or cameras unreachable" - raise ConnectionError(msg) -``` - -Total warmup retry budget: 5 seconds. Long enough for USB enumeration, short enough to not confuse the user. - -### Reconnection - -Retry within a tick handles brief glitches (serial timeout, dropped USB packet). For sustained disconnects (USB cable pulled), the robot's `is_connected()` returns `False` and consecutive retries will keep failing. - -Full reconnection (calling `robot.disconnect()` then `robot.connect()`) is **not** done automatically in the runtime. Reconnecting a serial bus mid-loop is dangerous — it resets servo state, torque settings, and calibration. This must be an explicit user decision. - -Instead, the runtime: - -1. Logs an error with reconnection instructions -2. Continues holding position (stale obs + last action) -3. Emits `lifecycle("connection_lost")` telemetry event -4. After `max_consecutive_errors` (default: `3 * fps` = 3 seconds of failures), raises `ConnectionError` with a clear message - -This gives the user time to replug USB without the loop crashing, but doesn't silently run for minutes with a dead robot. - -### Camera Recovery - -Cameras are more resilient than robots — `SharedCamera` (iceoryx2) handles publisher death and re-spawn. Direct camera backends (`RealsenseCamera`, `UVCCamera`) may need explicit reconnection. - -The runtime treats camera failure as soft: use last frame, log warning, continue. If a camera has never produced a frame (failure on first read), omit it from the model input dict. The model's behavior with missing camera keys is model-specific — not the runtime's concern. - -### Tests - -```text -physicalai/tests/unit/runtime/ -└── test_fault_tolerance.py # Mock robot that raises on get_observation/send_action: - # - transient error → retry succeeds - # - sustained error → stale fallback - # - fatal error → propagates - # - camera failure → stale frame used - # - max_consecutive_errors → ConnectionError raised -``` - ---- - -## Phase 4: Advanced (later) - -1. `AsyncExecution(transport="process")` for PyTorch CPU (GIL contention) -2. RTC guidance correction (requires split export from `library/`) -3. `RemoteExecution` + gRPC `PolicyServer` (see [policy_server_design.md](./policy_server_design.md)) - ---- - -## Component Mapping: inference_async.py → Library - -| Script component | Library target | Notes | -| ------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------- | -| `QueueMixer` | `ActionQueue` + `LerpSmoother` | Offset-aware blending extracted into smoother | -| `QueueMixer.lerp_duration = max(offset, 1)` | `LerpSmoother.merge()` computes from offset | Stateless — `duration_frames` is fallback when offset=0 | -| `InferenceThread` | `AsyncExecution` | Same thread architecture: obs_slot + result push | -| `InferenceThread.force_reset()` | `AsyncExecution._force_reset()` | Clears stuck state | -| `InferenceThread.busy_duration` | `AsyncExecution._busy_duration` | Watchdog timeout trigger | -| `InferenceThread.alive` | `AsyncExecution.alive` | Dead thread detection | -| `get_full_observation()` | `default_observation_to_input()` + `obs_to_input` callable | Separates robot obs format from model input format | -| `action_to_robot_dict()` | `Robot.send_action(ndarray)` | Robot protocol handles conversion | -| `main()` while-loop | `PolicyRuntime.run()` | Same 5-step structure | -| Velocity clamping + ramp | User callback or example code | Too robot-specific for core runtime | -| Camera discovery + `SharedCamera` | User code / CLI (Phase 3) | App-specific | -| Warm-up inference + queue seeding | `Execution.warmup()` | Seeds queue so loop starts with actions | -| `inference_thread.alive` check + restart | `AsyncExecution.maybe_request()` raises `WorkerDiedError` | Raise instead of silent restart | -| `force_reset()` for stuck thread | `AsyncExecution._force_reset()` via watchdog | Auto-triggered when `busy_duration > timeout` | -| Hold position + `hold_count` | `ActionQueue.consecutive_holds` + `PolicyRuntime` logging | Telemetry via queue counters | -| Two-backend support (torch/OV) | `InferenceModel` abstraction | Runtime only calls `predict_action_chunk()` | -| (none — ad-hoc print/log) | `TelemetryEmitter` + zenoh pub-sub | Fire-and-forget, no-op when zenoh absent (Phase 3.5) | -| (none — loop crashes on USB error) | `_resilient_observe()` / `_resilient_send()` | Retry + stale fallback for transient errors (Phase 3.6) | - ---- - -## Design Gaps Addressed - -Gaps identified during architecture review, with resolutions. - -### Gap 1: Observation ownership in async - -**Problem**: Main thread passes observation dict to `maybe_request()`. If main thread reuses camera buffers, inference thread reads corrupted data. - -**Resolution**: `AsyncExecution.maybe_request()` performs defensive copy before submitting: - -```python -snapshot = { - k: v.copy() if isinstance(v, np.ndarray) else v - for k, v in observation.items() -} -``` - -Cost: one dict of numpy copies per inference request (not per tick — only when threshold triggers). At 30fps with threshold=0.5 and chunk_size=50, that's roughly once every ~0.8s. - -### Gap 2: Empty-queue telemetry - -**Problem**: "Hold last action" silently masks queue starvation. - -**Resolution**: `ActionQueue` tracks `consecutive_holds` and `total_holds`. `PolicyRuntime` logs warnings on first hold and every `fps` consecutive holds (1 per second). `on_hold` callback exposes starvation events to user code. - -### Gap 3: Graceful shutdown - -**Problem**: Hard stop can jerk the robot. - -**Resolution**: `PolicyRuntime._shutdown()` drains up to 1 second of remaining actions at loop FPS. Beyond 1s, hard stop — the user pressed Ctrl+C for a reason. Robot and cameras stay connected here; caller owns connect/disconnect lifecycle. - -### Gap 4: Error propagation - -**Problem**: Inference thread exceptions silently swallowed — `pop_action()` returns None forever. - -**Resolution**: `AsyncExecution` stores `_death_cause` on thread exception. `maybe_request()` checks `alive` and raises `WorkerDiedError` with original traceback preserved via `raise ... from`. PolicyRuntime catches and re-raises. - -### Gap 5: Two-backend support - -**Problem**: inference_async.py handles PyTorch and OpenVINO with different preprocessing. - -**Resolution**: Not a runtime concern. `InferenceModel` abstracts backend differences. Runtime only calls `model.predict_action_chunk(obs)`. The torch bypass in inference_async.py exists because the script bypasses `InferenceModel` for direct Pi05 policy access — the library runtime won't need this. - ---- - -## Resolved Questions - -1. **`PolicyRuntime.run()` returns `RunStats`.** `@dataclass` with 4 core fields (steps, total_pops, total_holds, inference_count) plus 2 fault tolerance fields (transient_errors, stale_obs_ticks) added in Phase 3.6. Useful for testing and logging. - -2. **Warm-up happens inside `run()`.** User doesn't forget, no "did I call warmup?" failure mode. - -3. **Dead worker raises `WorkerDiedError`.** Let caller decide recovery strategy. Auto-restart can mask systematic failures. - ---- - -## Relationship to Existing Design Docs - -This plan refines [policy_runtime_design.md](./policy_runtime_design.md). Key differences: - -| Topic | Original design doc | This plan | -| ------------------------------- | ---------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | -| ActionQueue visibility | Public parameter on PolicyRuntime | Public parameter with sensible default. Internal `_action_queue.py` module, exported via `__init__`. | -| Execution ABC | `start(action_queue, model)`, `maybe_request(obs, action_queue)` | `start(model, action_queue)`, `maybe_request(obs)` — queue stored internally on start, not passed per call | -| Warmup | `warmup(sample_observation, n=2)` | `warmup(sample_observation)` — one call, seeds queue | -| Health monitoring | Not specified | First-class: alive, busy, busy_duration, WorkerDiedError, watchdog | -| Smoothing | `LerpChunkSmoother`, `ReplaceMerger` | `LerpSmoother`, `ReplaceSmoother` — stateless merge, offset-aware | -| Observation bridge | Not specified | `obs_to_input` callable with `default_observation_to_input` fallback | -| `predict_action_chunk()` return | `Mapping[str, Any]` with `"actions"` key | `np.ndarray` directly (matches actual implementation) | -| Bug fixes | Not applicable | Phase 1 prerequisite — bugs 1-3 on critical path | -| Telemetry | Not specified | Phase 3.5: zenoh pub-sub emitter (fire-and-forget), separate observer process, optional dep | -| Fault tolerance | Not specified | Phase 3.6: retry + stale fallback for transient HW errors, error classification, bounded retry | - -All ownership rules, boundary constraints, and deferred-until-needed decisions from the original design doc remain in effect. From c79b0e9537954ffd675f7561a2c06dd59f7cb6e2 Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Sun, 17 May 2026 22:14:24 +0200 Subject: [PATCH 05/19] fix style --- src/physicalai/robot/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/physicalai/robot/__init__.py b/src/physicalai/robot/__init__.py index 110f90a..d2159e9 100644 --- a/src/physicalai/robot/__init__.py +++ b/src/physicalai/robot/__init__.py @@ -19,9 +19,9 @@ __all__ = [ "Robot", + "RobotObservation", "connect", "verify_robot", - "RobotObservation", ] From e6a986365f5bd80200abe540e97db1963fd8773f Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Sun, 17 May 2026 22:22:39 +0200 Subject: [PATCH 06/19] remove outdated example --- examples/runtime/inference_async.py | 764 ---------------------------- 1 file changed, 764 deletions(-) delete mode 100644 examples/runtime/inference_async.py diff --git a/examples/runtime/inference_async.py b/examples/runtime/inference_async.py deleted file mode 100644 index 9eade0a..0000000 --- a/examples/runtime/inference_async.py +++ /dev/null @@ -1,764 +0,0 @@ -# pyrefly: ignore-errors -# Copyright (C) 2026 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Async threaded inference for SO-101 with lerp-blended action chunking. - -Supports two backends: -- **PyTorch/CUDA**: Pi05 checkpoint (.ckpt file) — GPU inference via select_action. -- **OpenVINO**: Exported model directory — CPU/GPU inference via InferenceModel. - -The backend is auto-detected from the checkpoint path: -- File ending in .ckpt → PyTorch Pi05 -- Directory → OpenVINO InferenceModel - -The main thread runs the robot control loop at a fixed FPS. -A background thread runs model inference and pushes action chunks -into a shared queue with lerp blending for smooth transitions. - -Uses LeRobot SO101Follower (degrees) for robot control and -physicalai SharedCamera (iceoryx2) for camera input. - -Usage (PyTorch): - python inference_async_pi05.py \ - --model-path pi05/pi05_eugene.ckpt \ - --device cuda \ - --robot-port /dev/ttyACM1 \ - --task "pick a can and place it in a bowl" - -Usage (OpenVINO): - python inference_async_pi05.py \ - --model-path exports/pi05_ov \ - --device CPU \ - --robot-port /dev/ttyACM1 \ - --camera-map overhead=top arm=wrist \ - --task "pick a can and place it in a bowl" -""" - -from __future__ import annotations - -import argparse -import logging -import threading -import time -from pathlib import Path - -import cv2 -import numpy as np - -from lerobot.robots.so_follower import SO101Follower, SO101FollowerConfig -from physicalai.capture import SharedCamera -from physicalai.capture.discovery import DeviceInfo, discover_all - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s [%(threadName)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) - -JOINT_NAMES = [ - "shoulder_pan", - "shoulder_lift", - "elbow_flex", - "wrist_flex", - "wrist_roll", - "gripper", -] - -IMAGE_WIDTH = 640 -IMAGE_HEIGHT = 480 -CAMERA_FPS = 30 - - -# --------------------------------------------------------------------------- -# Queue Mixer — lerp-blends old and new action chunks -# --------------------------------------------------------------------------- - -class QueueMixer: - """Action queue with linear-interpolation blending between chunks. - - When a new chunk arrives while old actions remain, the overlapping - region is lerp-blended so the robot doesn't jerk. An *offset* - parameter lets us skip the first N actions of the new chunk to - compensate for inference latency. - """ - - def __init__(self, lerp_duration: int = 5) -> None: - self.queue: np.ndarray | None = None - self.lerp_duration = lerp_duration - self.index = 0 - - def add(self, chunk: np.ndarray, offset: int = 0) -> None: - """Merge *chunk* (H, action_dim) into the queue.""" - if self.queue is None or self.index >= len(self.queue): - self.queue = chunk[offset:] - self.index = 0 - logger.info( - "[QueueMixer] First/exhausted → replaced queue (len=%d, offset=%d)", - len(self.queue), offset, - ) - return - - remaining = self.queue[self.index:] - incoming = chunk[offset:] - n_remain = len(remaining) - lerp_dur = min(n_remain, self.lerp_duration) - - weights = np.maximum(1.0 - np.arange(n_remain) / max(lerp_dur, 1), 0.0) - weights = weights[:, np.newaxis] - - n_blend = min(n_remain, len(incoming)) - blended = weights[:n_blend] * remaining[:n_blend] + (1.0 - weights[:n_blend]) * incoming[:n_blend] - - self.queue = np.concatenate([blended, incoming[n_blend:]], axis=0).astype(np.float32) - self.index = 0 - logger.info( - "[QueueMixer] Blended chunk (blended=%d, appended=%d, total=%d, lerp=%d, offset=%d)", - n_blend, max(len(incoming) - n_blend, 0), len(self.queue), lerp_dur, offset, - ) - - def pop(self) -> np.ndarray | None: - """Pop the next action, or return None if empty.""" - if self.queue is None or self.index >= len(self.queue): - return None - action = self.queue[self.index] - self.index += 1 - return action - - @property - def remaining(self) -> int: - if self.queue is None: - return 0 - return max(len(self.queue) - self.index, 0) - - @property - def empty(self) -> bool: - return self.remaining == 0 - - -# --------------------------------------------------------------------------- -# Inference Thread (PyTorch / CUDA) -# --------------------------------------------------------------------------- - -class InferenceThread: - """Background thread that runs model inference on demand. - - Supports two backends: - - "torch": Pi05 GPU inference via select_action + action_queue draining. - - "openvino": InferenceModel inference via model(dict)["action"]. - - The control loop writes an observation dict into `obs_slot` and sets - `obs_ready`. The inference thread picks it up, runs inference, and - pushes the result (chunk + timing) into `result_slot`. - """ - - def __init__( - self, - model: object, - backend: str, - device: str, - task: str, - flip_cameras: set[str], - camera_name_map: dict[str, str] | None = None, - blank_cameras: list[str] | None = None, - ) -> None: - self.model = model - self.backend = backend - self.device = device - self.task = task - self.flip_cameras = flip_cameras - self.camera_name_map = camera_name_map or {} - self.blank_cameras = blank_cameras or [] - - self._lock = threading.Lock() - self._obs_slot: dict | None = None - self._obs_ready = threading.Event() - self._running_inference = False - self._request_time = 0.0 - - self._result_lock = threading.Lock() - self._result_slot: tuple[np.ndarray, float] | None = None - - self._stop = threading.Event() - self._thread = threading.Thread(target=self._run, name="InferenceThread", daemon=True) - self.inference_count = 0 - - def start(self) -> None: - logger.info("[InferenceThread] Starting background thread") - self._thread.start() - - def stop(self) -> None: - logger.info("[InferenceThread] Requesting stop") - self._stop.set() - self._obs_ready.set() - self._thread.join(timeout=10.0) - logger.info("[InferenceThread] Stopped (ran %d inferences)", self.inference_count) - - def request(self, observation: dict) -> bool: - """Submit an observation dict for inference. Returns False if busy.""" - with self._lock: - if self._obs_slot is not None or self._running_inference: - return False - self._obs_slot = observation - self._request_time = time.perf_counter() - self._obs_ready.set() - logger.debug("[InferenceThread] Observation submitted") - return True - - @property - def busy(self) -> bool: - with self._lock: - return self._obs_slot is not None or self._running_inference - - @property - def alive(self) -> bool: - return self._thread.is_alive() - - @property - def busy_duration(self) -> float: - """Seconds since last request was submitted. 0 if not busy.""" - with self._lock: - if not (self._obs_slot is not None or self._running_inference): - return 0.0 - return time.perf_counter() - self._request_time - - def force_reset(self) -> None: - """Clear stuck state so new requests can be submitted.""" - with self._lock: - self._obs_slot = None - self._running_inference = False - logger.warning("[InferenceThread] Force reset — cleared stuck state") - - def get_result(self) -> tuple[np.ndarray, float] | None: - """Non-blocking fetch of the latest result. Returns (chunk, latency_s) or None.""" - with self._result_lock: - r = self._result_slot - self._result_slot = None - return r - - def _build_torch_observation(self, obs_dict: dict) -> object: - """Convert raw observation dict to Pi05 Observation on device (torch).""" - import torch - from physicalai.data.observation import Observation - - state = torch.tensor( - [[obs_dict[f"{jn}.pos"] for jn in JOINT_NAMES]], - dtype=torch.float32, - device=self.device, - ) - - images = {} - for cam_key in [k for k in obs_dict if isinstance(obs_dict[k], np.ndarray) and obs_dict[k].ndim == 3]: - img = np.ascontiguousarray(obs_dict[cam_key]) - if cam_key in self.flip_cameras: - img = cv2.rotate(img, cv2.ROTATE_180) - t = torch.from_numpy(img.copy()).float() / 255.0 - t = t.permute(2, 0, 1).unsqueeze(0) - images[cam_key] = t.to(self.device) - - return Observation(state=state, images=images, task=self.task) - - def _build_ov_observation(self, obs_dict: dict) -> dict[str, np.ndarray]: - """Convert raw observation dict to InferenceModel input (numpy).""" - state = np.array([[obs_dict[f"{jn}.pos"] for jn in JOINT_NAMES]], dtype=np.float32) - observation: dict[str, np.ndarray] = {"state": state, "task": [self.task]} - - for cam_key in [k for k in obs_dict if isinstance(obs_dict[k], np.ndarray) and obs_dict[k].ndim == 3]: - img = np.ascontiguousarray(obs_dict[cam_key]) - if cam_key in self.flip_cameras: - img = cv2.rotate(img, cv2.ROTATE_180) - img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) - img = img.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 - model_name = self.camera_name_map.get(cam_key, cam_key) - observation[f"images.{model_name}"] = img - - for blank_cam in self.blank_cameras: - observation[f"images.{blank_cam}"] = np.zeros( - (1, 3, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32, - ) - - return observation - - def _infer_torch(self, obs_dict: dict) -> np.ndarray: - """Run Pi05 inference and return the full action chunk as numpy.""" - import torch - from collections import deque - - observation = self._build_torch_observation(obs_dict) - self.model._action_queue = deque() - - with torch.no_grad(): - action = self.model.select_action(observation) - if self.device == "cuda": - torch.cuda.synchronize() - - actions = [action.detach().cpu().numpy()] - while len(self.model._action_queue) > 0: - a = self.model._action_queue.popleft() - actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a) - - return np.array([np.squeeze(a) for a in actions], dtype=np.float32) - - def _infer_ov(self, obs_dict: dict) -> np.ndarray: - """Run OpenVINO inference and return the full action chunk as numpy.""" - model_input = self._build_ov_observation(obs_dict) - output = self.model(model_input)["action"] - chunk = np.squeeze(output) - if chunk.ndim == 1: - chunk = chunk[np.newaxis, :] - return chunk.astype(np.float32) - - def _run(self) -> None: - logger.info("[InferenceThread] Thread started, waiting for observations") - while not self._stop.is_set(): - self._obs_ready.wait() - self._obs_ready.clear() - - if self._stop.is_set(): - break - - with self._lock: - obs_dict = self._obs_slot - self._obs_slot = None - self._running_inference = True - - if obs_dict is None: - with self._lock: - self._running_inference = False - continue - - self.inference_count += 1 - logger.info("[InferenceThread] >>> Inference #%d START", self.inference_count) - t0 = time.perf_counter() - - try: - if self.backend == "torch": - chunk = self._infer_torch(obs_dict) - else: - chunk = self._infer_ov(obs_dict) - except Exception: - logger.exception("[InferenceThread] Inference failed") - with self._lock: - self._running_inference = False - continue - - latency = time.perf_counter() - t0 - logger.info( - "[InferenceThread] <<< Inference #%d DONE latency=%.1fms chunk_shape=%s", - self.inference_count, latency * 1000, chunk.shape, - ) - - with self._result_lock: - self._result_slot = (chunk, latency) - with self._lock: - self._running_inference = False - - -# --------------------------------------------------------------------------- -# Observation helpers -# --------------------------------------------------------------------------- - -def get_full_observation( - robot: SO101Follower, - cameras: dict[str, SharedCamera], -) -> dict: - """Combine robot state + SharedCamera images into one dict. - - LeRobot SO101Follower returns joint values in the correct - model domain (body joints in degrees, gripper in [0, 100]). - """ - obs = robot.get_observation() - for cam_key, cam in cameras.items(): - frame = cam.read_latest() - obs[cam_key] = frame.data - return obs - - -def action_to_robot_dict(action: np.ndarray) -> dict[str, float]: - """Convert action array to a LeRobot send_action dict.""" - return {f"{jn}.pos": float(action[i]) for i, jn in enumerate(JOINT_NAMES)} - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - -# --------------------------------------------------------------------------- -# Camera discovery -# --------------------------------------------------------------------------- - -def select_cameras() -> dict[str, str]: - """Discover cameras and interactively ask user to assign overhead/arm.""" - print("\n[cameras] Discovering cameras ...", flush=True) - - all_devices = discover_all() - - # Flatten into a single list (deduplicate across drivers by hardware_id) - seen: set[str] = set() - devices: list[DeviceInfo] = [] - for driver_devices in all_devices.values(): - for dev in driver_devices: - key = dev.hardware_id or f"{dev.driver}:{dev.device_id}" - if key not in seen: - seen.add(key) - devices.append(dev) - - if not devices: - raise RuntimeError("No cameras found. Check connections and permissions.") - - print(f"\n Found {len(devices)} camera(s):", flush=True) - for i, dev in enumerate(devices): - stable_id = dev.hardware_id or "" - print(f" [{i}] {dev.name or dev.model or 'Unknown'} ({stable_id or dev.device_id})", flush=True) - - cameras: dict[str, str] = {} - for role in ("overhead", "arm"): - while True: - choice = input(f"\n Select {role} camera (0-{len(devices)-1}), or 's' to skip: ").strip() - if choice.lower() == "s": - print(f" Skipping {role} camera", flush=True) - break - if choice.isdigit() and 0 <= int(choice) < len(devices): - dev = devices[int(choice)] - # Prefer stable by-id path, fall back to device_id - path = dev.hardware_id if dev.hardware_id else dev.device_id - cameras[role] = path - print(f" {role} → {path}", flush=True) - break - print(f" Invalid choice. Enter 0-{len(devices)-1} or 's'.") - - if not cameras: - raise RuntimeError("At least one camera must be selected.") - - return cameras - - -def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description="Async threaded SO-101 inference (PyTorch or OpenVINO).") - p.add_argument("--model-path", type=str, required=True, - help="Path to Pi05 checkpoint (.ckpt) or OpenVINO export directory") - p.add_argument("--device", type=str, default="cuda", - help="'cuda' or 'cpu' for torch; 'CPU', 'GPU', 'NPU' for OpenVINO") - p.add_argument("--robot-port", type=str, default="/dev/ttyACM1") - p.add_argument("--robot-id", type=str, default="my_so101_follower") - p.add_argument("--overhead-camera", type=str, default=None, - help="UVC device path for overhead camera (skip interactive selection)") - p.add_argument("--arm-camera", type=str, default=None, - help="UVC device path for arm camera (skip interactive selection)") - p.add_argument("--task", type=str, default="pick a can and place it in a bowl") - p.add_argument("--fps", type=int, default=30) - p.add_argument("--queue-threshold", type=float, default=0.5, - help="Request new inference when queue drops below this fraction of chunk size.") - p.add_argument("--lerp-duration", type=int, default=5, - help="Number of frames over which to blend old/new chunks.") - p.add_argument("--flip-cameras", nargs="*", default=[], - help="Camera names to rotate 180 degrees.") - p.add_argument("--infer-timeout", type=float, default=15.0, - help="Seconds before force-resetting a stuck inference thread.") - p.add_argument("--max-speed", type=float, default=90.0, - help="Max joint speed in degrees/second. Clamps action deltas per step.") - p.add_argument("--ramp-steps", type=int, default=30, - help="Number of initial steps to ramp up speed (starts at 1/4 max-speed).") - p.add_argument("--no-compile", action="store_true", default=True, - help="Disable torch.compile (default: disabled, torch only)") - # OpenVINO-specific options - p.add_argument("--camera-map", nargs="*", - help="Map camera names to model input names, e.g. overhead=top arm=wrist (OV only)") - p.add_argument("--blank-cameras", nargs="*", - help="Model camera inputs to fill with zeros, e.g. side (OV only)") - return p.parse_args() - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -def main(args: argparse.Namespace) -> None: - model_path = Path(args.model_path) - if not model_path.exists(): - raise FileNotFoundError(f"Model not found: {model_path}") - - # Auto-detect backend - if model_path.is_dir(): - backend = "openvino" - # Default device for OpenVINO is CPU when user left the torch default - if args.device in ("cuda", "cuda:0"): - print(f"[init] Warning: OpenVINO backend does not support CUDA. Falling back to CPU.", flush=True) - args.device = "CPU" - else: - args.device = args.device.upper() - elif model_path.suffix == ".ckpt": - backend = "torch" - else: - raise ValueError( - f"Cannot determine backend from '{model_path}'. " - "Use a .ckpt file for PyTorch or a directory for OpenVINO." - ) - - flip_cameras = set(args.flip_cameras or []) - camera_name_map = dict(kv.split("=") for kv in (args.camera_map or [])) - blank_cameras = list(args.blank_cameras or []) - - print(f"[init] Model: {model_path}", flush=True) - print(f"[init] Backend: {backend}", flush=True) - print(f"[init] Device: {args.device}", flush=True) - print(f"[init] Flip cameras: {flip_cameras}", flush=True) - if backend == "openvino": - print(f"[init] Camera map: {camera_name_map}", flush=True) - print(f"[init] Blank cameras: {blank_cameras}", flush=True) - - # --- Set up cameras --- - # Use CLI paths if provided, otherwise run interactive selection - if args.overhead_camera or args.arm_camera: - camera_paths: dict[str, str] = {} - if args.overhead_camera: - camera_paths["overhead"] = args.overhead_camera - if args.arm_camera: - camera_paths["arm"] = args.arm_camera - else: - camera_paths = select_cameras() - - cameras: dict[str, SharedCamera] = {} - for cam_name, cam_device in camera_paths.items(): - cameras[cam_name] = SharedCamera( - "uvc", device=cam_device, - width=IMAGE_WIDTH, height=IMAGE_HEIGHT, fps=CAMERA_FPS, - ) - - for name, cam in cameras.items(): - cam.connect() - print(f"[init] Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps", flush=True) - - # --- Load model --- - if backend == "torch": - import torch - from physicalai.policies.pi05 import Pi05 - - print("[init] Loading Pi05 model (map_location=cpu) ...", flush=True) - model = Pi05.load_from_checkpoint( - str(model_path), map_location="cpu", compile_model=not args.no_compile, - ) - print("[init] Checkpoint loaded to CPU, moving to device ...", flush=True) - model = model.to(args.device) - model.eval() - print(f"[init] Model loaded on {args.device}", flush=True) - if args.device == "cuda": - print(f"[init] VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB", flush=True) - else: - import openvino_tokenizers # noqa: F401 - from physicalai.inference import InferenceModel - from physicalai.inference.runners import SinglePass - - print(f"[init] Loading OpenVINO model from {model_path} ...", flush=True) - model = InferenceModel( - export_dir=str(model_path), - device=args.device, - runner=SinglePass(), - ) - print("[init] Model loaded", flush=True) - - - # --- Connect robot (LeRobot SO101Follower, no cameras) --- - print(f"[init] Connecting robot on {args.robot_port} ...", flush=True) - robot_cfg = SO101FollowerConfig( - port=args.robot_port, - id=args.robot_id, - ) - robot = SO101Follower(robot_cfg) - robot.connect() - print(f"[init] Robot connected (id={args.robot_id})", flush=True) - - # --- Warm-up inference to determine chunk size --- - print("[init] Getting warm-up observation ...", flush=True) - warmup_obs = get_full_observation(robot, cameras) - print(f"[init] Got observation, keys: {list(warmup_obs.keys())}", flush=True) - - print("[init] Running warm-up inference ...", flush=True) - t0 = time.perf_counter() - - if backend == "torch": - import torch - from collections import deque - from physicalai.data.observation import Observation - - state = torch.tensor( - [[warmup_obs[f"{jn}.pos"] for jn in JOINT_NAMES]], - dtype=torch.float32, device=args.device, - ) - warmup_images = {} - for cam_key in cameras: - img = np.ascontiguousarray(warmup_obs[cam_key]) - if cam_key in flip_cameras: - img = cv2.rotate(img, cv2.ROTATE_180) - t = torch.from_numpy(img.copy()).float() / 255.0 - t = t.permute(2, 0, 1).unsqueeze(0).to(args.device) - warmup_images[cam_key] = t - - warmup_observation = Observation(state=state, images=warmup_images, task=args.task) - model._action_queue = deque() - with torch.no_grad(): - warmup_action = model.select_action(warmup_observation) - if args.device == "cuda": - torch.cuda.synchronize() - - warmup_actions = [warmup_action.detach().cpu().numpy()] - while len(model._action_queue) > 0: - a = model._action_queue.popleft() - warmup_actions.append(a.detach().cpu().numpy() if isinstance(a, torch.Tensor) else a) - warmup_chunk = np.array([np.squeeze(a) for a in warmup_actions], dtype=np.float32) - else: - # OV warm-up - warmup_state = np.array( - [[warmup_obs[f"{jn}.pos"] for jn in JOINT_NAMES]], dtype=np.float32, - ) - warmup_input: dict[str, np.ndarray] = {"state": warmup_state, "task": [args.task]} - for cam_key in cameras: - img = np.ascontiguousarray(warmup_obs[cam_key]) - if cam_key in flip_cameras: - img = cv2.rotate(img, cv2.ROTATE_180) - img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT)) - img = img.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 - model_name = camera_name_map.get(cam_key, cam_key) - warmup_input[f"images.{model_name}"] = img - for blank_cam in blank_cameras: - warmup_input[f"images.{blank_cam}"] = np.zeros( - (1, 3, IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.float32, - ) - warmup_out = model(warmup_input)["action"] - warmup_chunk = np.squeeze(warmup_out) - if warmup_chunk.ndim == 1: - warmup_chunk = warmup_chunk[np.newaxis, :] - warmup_chunk = warmup_chunk.astype(np.float32) - - warmup_latency = time.perf_counter() - t0 - chunk_size = warmup_chunk.shape[0] - action_dim = warmup_chunk.shape[1] - print(f"[init] Warm-up done: chunk_size={chunk_size}, action_dim={action_dim}, " - f"latency={warmup_latency*1000:.1f}ms", flush=True) - - # --- Init components --- - queue_mixer = QueueMixer(lerp_duration=args.lerp_duration) - queue_mixer.add(warmup_chunk, offset=0) - - inference_thread = InferenceThread( - model, backend, args.device, args.task, flip_cameras, - camera_name_map=camera_name_map, blank_cameras=blank_cameras, - ) - inference_thread.start() - - goal_time = 1.0 / args.fps - threshold = int(chunk_size * args.queue_threshold) - max_delta_per_step = args.max_speed / args.fps # degrees per step - step = 0 - actions_from_queue = 0 - hold_count = 0 - last_action: np.ndarray = warmup_chunk[0] - - # Initialize commanded position from current robot state (degrees/[0,100]) - init_obs = robot.get_observation() - commanded_pos = np.array( - [init_obs[f"{jn}.pos"] for jn in JOINT_NAMES], dtype=np.float32, - ) - print(f"[init] Current joint pos: {commanded_pos}", flush=True) - print(f"[init] Max speed: {args.max_speed:.0f} deg/s → {max_delta_per_step:.1f} per step @ {args.fps}Hz", flush=True) - print(f"[init] Ramp-up: {args.ramp_steps} steps", flush=True) - print("[run] Starting — Ctrl+C to stop", flush=True) - - try: - while True: - loop_start = time.perf_counter() - - # 1. Check for inference results - result = inference_thread.get_result() - if result is not None: - chunk, latency = result - offset = int(latency * args.fps) - logger.info( - "[Control] Got inference #%d: chunk=%s, latency=%.1fms, offset=%d", - inference_thread.inference_count, chunk.shape, latency * 1000, offset, - ) - queue_mixer.add(chunk, offset=offset) - queue_mixer.lerp_duration = max(offset, 1) - - # 2. Maybe request new inference (with stuck/dead detection) - if queue_mixer.remaining <= threshold: - if not inference_thread.alive: - logger.error("[Control] Inference thread DEAD — restarting") - inference_thread = InferenceThread( - model, backend, args.device, args.task, flip_cameras, - camera_name_map=camera_name_map, blank_cameras=blank_cameras, - ) - inference_thread.start() - elif inference_thread.busy_duration > args.infer_timeout: - logger.warning( - "[Control] Inference stuck for %.0fs — force resetting", - inference_thread.busy_duration, - ) - inference_thread.force_reset() - - if not inference_thread.busy: - obs = get_full_observation(robot, cameras) - submitted = inference_thread.request(obs) - if submitted: - logger.info( - "[Control] Requested inference (queue_remaining=%d, threshold=%d)", - queue_mixer.remaining, threshold, - ) - - # 3. Pop action from queue - action = queue_mixer.pop() - if action is not None: - last_action = action - actions_from_queue += 1 - hold_count = 0 - else: - action = last_action - hold_count += 1 - if hold_count == 1 or hold_count % 30 == 0: - logger.warning( - "[Control] Queue empty, holding position (hold_count=%d)", hold_count, - ) - - # 4. Velocity clamp and send - if step < args.ramp_steps: - ramp_frac = 0.25 + 0.75 * (step / args.ramp_steps) - effective_delta = max_delta_per_step * ramp_frac - else: - effective_delta = max_delta_per_step - - delta = action - commanded_pos - clamped_delta = np.clip(delta, -effective_delta, effective_delta) - commanded_pos = (commanded_pos + clamped_delta).astype(np.float32) - robot.send_action(action_to_robot_dict(commanded_pos)) - - # 5. Timing - elapsed = time.perf_counter() - loop_start - sleep_time = goal_time - elapsed - if sleep_time > 0: - time.sleep(sleep_time) - - step += 1 - if step % 100 == 0: - actual_hz = 1.0 / max(time.perf_counter() - loop_start, 1e-6) - logger.info( - "[Control] Step %d | %.1f Hz | queue=%d | from_queue=%d | holds=%d | inferences=%d", - step, actual_hz, queue_mixer.remaining, - actions_from_queue, hold_count, inference_thread.inference_count, - ) - - except KeyboardInterrupt: - logger.info("[Control] Interrupted by user") - finally: - inference_thread.stop() - for name, cam in cameras.items(): - cam.disconnect() - logger.info("Camera '%s' disconnected", name) - robot.disconnect() - logger.info( - "[Control] Cleanup complete — %d steps, %d actions from queue, %d holds, %d inferences", - step, actions_from_queue, hold_count, inference_thread.inference_count, - ) - - -if __name__ == "__main__": - main(parse_args()) From 4368f33e046158ea800df2fc530a107200e56b73 Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 13:31:35 +0200 Subject: [PATCH 07/19] add example script --- examples/runtime/async_inference.py | 128 ++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 examples/runtime/async_inference.py diff --git a/examples/runtime/async_inference.py b/examples/runtime/async_inference.py new file mode 100644 index 0000000..834fe00 --- /dev/null +++ b/examples/runtime/async_inference.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Async inference with PolicyRuntime. + +python examples/runtime/async_inference.py \ + --model ./exports/pi05_cans_openvino \ + --device GPU.0 \ + --port /dev/ttyACM0 \ + --calibration /home/max/.cache/physicalai/robots/a8d8d997-a59e-4423-9006-5d991d223887/calibrations/0b2f185a-8ab2-4956-91c2-3a2ac2dbd8c1.json \ + --overhead-camera /dev/v4l/by-id/usb-UGREEN_Camera_2K_UGREEN_Camera_2K_SN0001-video-index0 \ + --arm-camera 353322271391 \ + --front-camera /dev/v4l/by-id/usb-Innomaker_Innomaker-U20CAM-1080p-S1_SN0001-video-index0 \ + --width 640 \ + --height 480 \ + --fps 30 \ + --duration-s 60 +""" + +from __future__ import annotations + +import argparse +from typing import Any + +import openvino as ov +import numpy as np +from numpy import core + +from physicalai.capture import Frame, discover_all +from physicalai.capture.transport import SharedCamera +from physicalai.inference import InferenceModel +from physicalai.robot import SO101 +from physicalai.robot.interface import RobotObservation +from physicalai.runtime import ( + ActionQueue, + AsyncExecution, + LerpSmoother, + PolicyRuntime, +) + + +def obs_to_input( + robot_obs: RobotObservation, + camera_frames: dict[str, Frame], +) -> dict[str, Any]: + """Convert robot observation + camera frames to model input dict.""" + state = np.asarray(robot_obs.joint_positions, dtype=np.float32) + if state.ndim == 1: + state = state[np.newaxis] # (state_dim,) -> (1, state_dim) + model_input: dict[str, Any] = { + "state": state, + } + for name, frame in camera_frames.items(): + # (H, W, C) -> (1, C, H, W) float32 [0, 1] + img = frame.data.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 + model_input[f"images.{name}"] = img + return model_input + + +def main(): + parser = argparse.ArgumentParser(description="Run policy with PolicyRuntime") + parser.add_argument("--model", required=True, help="Exported model directory") + parser.add_argument("--device", default="GPU.0", help="OpenVINO device") + parser.add_argument("--port", default="/dev/ttyACM0", help="Robot serial port") + parser.add_argument("--calibration", required=True, help="Robot calibration file") + parser.add_argument("--overhead-camera", required=True, help="Overhead camera device path") + parser.add_argument("--arm-camera", required=True, help="Arm camera serial number") + parser.add_argument("--front-camera", required=True, help="Front camera device path") + parser.add_argument("--width", type=int, default=640) + parser.add_argument("--height", type=int, default=480) + parser.add_argument("--duration-s", type=float, default=60.0) + parser.add_argument("--fps", type=float, default=30.0) + args = parser.parse_args() + + import openvino_tokenizers # noqa: F401 — registers OV tokenizer ops + + print(f"Available devices:") + core = ov.Core() + devices = core.available_devices + for dev in devices: + print(f" {dev}: {core.get_property(dev, 'FULL_DEVICE_NAME')}") + print(f"Selected device: {args.device}") + + model = InferenceModel.load(args.model, device=args.device) + robot = SO101(port=args.port, calibration=args.calibration, role="follower") + cameras = { + "overhead": SharedCamera("uvc", device=args.overhead_camera, width=args.width, height=args.height, fps=int(args.fps)), + "front": SharedCamera("uvc", device=args.front_camera, width=args.width, height=args.height, fps=int(args.fps)), + "arm": SharedCamera("realsense", serial_number=args.arm_camera, width=args.width, height=args.height, fps=int(args.fps)), + } + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=AsyncExecution(threshold=0.3, fps=int(args.fps)), + action_queue=ActionQueue(smoother=LerpSmoother(duration_frames=5)), + cameras=cameras, + fps=args.fps, + obs_to_input=obs_to_input, + ) + + robot.connect() + for name, cam in cameras.items(): + try: + cam.connect() + except Exception as e: + print(f"Failed to connect camera '{name}': {e}") + print("Available cameras:") + for c in discover_all(): + print(f"Driver: {c.driver}, Device: {c.device}, Info: {c.info}") + return + print(f"Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps") + + print("Starting policy runtime...") + try: + stats = runtime.run(duration_s=args.duration_s) + print(f"\nDone — {stats.steps} steps, {stats.inference_count} inferences, {stats.total_holds} holds") + finally: + for name, cam in cameras.items(): + cam.disconnect() + print(f"Camera '{name}' disconnected") + robot.disconnect() + print("Robot disconnected") + + +if __name__ == "__main__": + main() From ed9fda70386629c4974580719ae18d2c554193ca Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 16:15:06 +0200 Subject: [PATCH 08/19] replace ActionCursor with deque, return 1D and 2D shape for select_action() and predict_action_chunk() --- src/physicalai/inference/model.py | 27 ++--- src/physicalai/inference/utils/__init__.py | 8 -- .../inference/utils/action_cursor.py | 90 --------------- tests/unit/inference/test_action_cursor.py | 107 ------------------ 4 files changed, 14 insertions(+), 218 deletions(-) delete mode 100644 src/physicalai/inference/utils/__init__.py delete mode 100644 src/physicalai/inference/utils/action_cursor.py delete mode 100644 tests/unit/inference/test_action_cursor.py diff --git a/src/physicalai/inference/model.py b/src/physicalai/inference/model.py index 39f4fc4..8d943a9 100644 --- a/src/physicalai/inference/model.py +++ b/src/physicalai/inference/model.py @@ -7,9 +7,11 @@ import json import warnings +from collections import deque from pathlib import Path from typing import TYPE_CHECKING, Any, Self +import numpy as np import yaml from physicalai.inference.adapters import adapter_registry, get_adapter @@ -17,11 +19,8 @@ from physicalai.inference.constants import ACTION from physicalai.inference.manifest import ComponentSpec, Manifest from physicalai.inference.runners import get_runner -from physicalai.inference.utils import ActionCursor if TYPE_CHECKING: - import numpy as np - from physicalai.inference.adapters.base import RuntimeAdapter from physicalai.inference.callbacks.base import Callback from physicalai.inference.postprocessors.base import Postprocessor @@ -125,7 +124,7 @@ def __init__( for callback in self.callbacks: callback.on_load(self) - self.cursor = ActionCursor() + self._action_buffer: deque[np.ndarray] = deque() @property def chunk_size(self) -> int: @@ -207,31 +206,33 @@ def select_action(self, observation: dict[str, np.ndarray]) -> np.ndarray: observation: Observation dict mapping names to numpy arrays. Returns: - Action array to execute. + 1-D action vector with shape ``(action_dim,)``. Examples: >>> obs = env.reset() >>> action = policy.select_action(obs) >>> next_obs, reward, done = env.step(action) """ - if self.cursor.empty: - self.cursor.push_chunk(self(observation)[ACTION]) - - return self.cursor.pop() + if not self._action_buffer: + self._action_buffer.extend(self.predict_action_chunk(observation)) + return self._action_buffer.popleft() def predict_action_chunk(self, observation: dict[str, np.ndarray]) -> np.ndarray: """Predict a chunk of actions for the given observation. - Delegates to ``__call__`` and extracts the ``"action_chunk"`` key. + Delegates to ``__call__`` and extracts the ``"action"`` key. Args: observation: Observation dict mapping names to numpy arrays. Returns: - Chunk of actions to execute. + 2-D action chunk with shape ``(chunk_size, action_dim)``. """ outputs = self(observation) - return outputs[ACTION] + actions = np.asarray(outputs[ACTION]) + # Drop the adapter's batch dimension so the return value matches + # the documented contract: a 2-D (chunk, action_dim) sequence. + return np.atleast_2d(np.squeeze(actions)) def reset(self) -> None: """Reset policy state for new episode. @@ -250,7 +251,7 @@ def reset(self) -> None: ... obs, reward, done = env.step(action) """ self.runner.reset() - self.cursor.reset() + self._action_buffer.clear() for callback in self.callbacks: callback.on_reset() diff --git a/src/physicalai/inference/utils/__init__.py b/src/physicalai/inference/utils/__init__.py deleted file mode 100644 index c576e49..0000000 --- a/src/physicalai/inference/utils/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) 2026 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""Inference utilities.""" - -from physicalai.inference.utils.action_cursor import ActionCursor - -__all__ = ["ActionCursor"] diff --git a/src/physicalai/inference/utils/action_cursor.py b/src/physicalai/inference/utils/action_cursor.py deleted file mode 100644 index a219124..0000000 --- a/src/physicalai/inference/utils/action_cursor.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (C) 2026 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""ActionCursor: action-chunk buffer for temporal action dispensing.""" - -from __future__ import annotations - -from collections import deque - -import numpy as np - - -class ActionCursor: - """Buffer that queues an action chunk and dispenses one timestep per call. - - Call :meth:`push_chunk` with the full action output from the runner - (shape ``(batch, T, action_dim)``), then call :meth:`pop` repeatedly - to retrieve individual timestep actions (shape ``(batch, action_dim)``). - When the buffer is exhausted, :attr:`empty` is ``True`` and a new - chunk should be pushed. - - Examples: - >>> cursor = ActionCursor() - >>> cursor.empty - True - >>> chunk = np.random.randn(1, 10, 7) # batch=1, T=10, action_dim=7 - >>> cursor.push_chunk(chunk) - >>> cursor.empty - False - >>> action = cursor.pop() # shape (1, 7) - >>> cursor.reset() - >>> cursor.empty - True - """ - - def __init__(self) -> None: - """Initialize an empty ActionCursor with no buffered actions.""" - self._queue: deque[np.ndarray] = deque() - - @property - def empty(self) -> bool: - """True when there are no buffered actions remaining.""" - return len(self._queue) == 0 - - def push_chunk(self, chunk: np.ndarray) -> None: - """Queue all timestep slices from an action chunk. - - Args: - chunk: Action array with shape ``(batch, T, action_dim)`` or - ``(T, action_dim)``. Each of the ``T`` timestep slices is - enqueued individually. - - Raises: - ValueError: If ``chunk.ndim`` is not 2 or 3. - """ - min_batched_action_dim = 2 - batched_temporal_dim = 3 - if chunk.ndim not in {min_batched_action_dim, batched_temporal_dim}: - msg = f"Chunk must be a 2-D or 3-D array, got ndim={chunk.ndim}." - raise ValueError(msg) - - if chunk.ndim == min_batched_action_dim: - # (T, action_dim) - no batch dimension - self._queue.extend(chunk) - else: - # (batch, T, action_dim) - transpose to (T, batch, action_dim) - self._queue.extend(np.transpose(chunk, (1, 0, 2))) - - def pop(self) -> np.ndarray: - """Return and remove the next buffered action. - - Returns: - Action array for the current timestep, shape ``(batch, action_dim)`` - or ``(action_dim,)`` depending on what was pushed. - - Raises: - IndexError: If the queue is empty. - """ - if self.empty: - msg = "ActionCursor is empty; call push_chunk before pop." - raise IndexError(msg) - return self._queue.popleft() - - def reset(self) -> None: - """Clear all buffered actions.""" - self._queue.clear() - - def __repr__(self) -> str: - """Return string representation.""" - return f"ActionCursor(buffered={len(self._queue)})" diff --git a/tests/unit/inference/test_action_cursor.py b/tests/unit/inference/test_action_cursor.py deleted file mode 100644 index 3a82b87..0000000 --- a/tests/unit/inference/test_action_cursor.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (C) 2026 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import numpy as np -import pytest - -from physicalai.inference.utils.action_cursor import ActionCursor - - -class TestActionCursor: - def test_init_starts_empty_and_repr(self) -> None: - cursor = ActionCursor() - assert cursor.empty is True - assert repr(cursor) == "ActionCursor(buffered=0)" - - def test_push_chunk_2d_pops_in_row_order(self) -> None: - cursor = ActionCursor() - chunk = np.array( - [ - [1.0, 2.0], - [3.0, 4.0], - [5.0, 6.0], - ], - dtype=np.float32, - ) - - cursor.push_chunk(chunk) - - np.testing.assert_array_equal(cursor.pop(), np.array([1.0, 2.0], dtype=np.float32)) - np.testing.assert_array_equal(cursor.pop(), np.array([3.0, 4.0], dtype=np.float32)) - np.testing.assert_array_equal(cursor.pop(), np.array([5.0, 6.0], dtype=np.float32)) - assert cursor.empty is True - - def test_push_chunk_3d_pops_time_slices(self) -> None: - cursor = ActionCursor() - chunk = np.array( - [ - [[1.0, 10.0], [2.0, 20.0], [3.0, 30.0]], - [[4.0, 40.0], [5.0, 50.0], [6.0, 60.0]], - ], - dtype=np.float32, - ) - - cursor.push_chunk(chunk) - - np.testing.assert_array_equal( - cursor.pop(), - np.array([[1.0, 10.0], [4.0, 40.0]], dtype=np.float32), - ) - np.testing.assert_array_equal( - cursor.pop(), - np.array([[2.0, 20.0], [5.0, 50.0]], dtype=np.float32), - ) - np.testing.assert_array_equal( - cursor.pop(), - np.array([[3.0, 30.0], [6.0, 60.0]], dtype=np.float32), - ) - assert cursor.empty is True - - def test_pop_empty_raises_index_error(self) -> None: - cursor = ActionCursor() - with pytest.raises(IndexError, match="ActionCursor is empty; call push_chunk before pop"): - cursor.pop() - - def test_reset_clears_buffered_actions(self) -> None: - cursor = ActionCursor() - cursor.push_chunk(np.array([[1.0], [2.0]], dtype=np.float32)) - - cursor.reset() - - assert cursor.empty is True - with pytest.raises(IndexError): - cursor.pop() - - def test_multiple_pushes_append_in_fifo_order(self) -> None: - cursor = ActionCursor() - first = np.array([[1.0, 1.0], [2.0, 2.0]], dtype=np.float32) - second = np.array( - [ - [[3.0, 3.0], [4.0, 4.0]], - ], - dtype=np.float32, - ) - - cursor.push_chunk(first) - cursor.push_chunk(second) - - np.testing.assert_array_equal(cursor.pop(), np.array([1.0, 1.0], dtype=np.float32)) - np.testing.assert_array_equal(cursor.pop(), np.array([2.0, 2.0], dtype=np.float32)) - np.testing.assert_array_equal(cursor.pop(), np.array([[3.0, 3.0]], dtype=np.float32)) - np.testing.assert_array_equal(cursor.pop(), np.array([[4.0, 4.0]], dtype=np.float32)) - assert cursor.empty is True - - @pytest.mark.parametrize( - "chunk", - [ - np.array([1.0, 2.0], dtype=np.float32), - np.zeros((1, 2, 3, 4), dtype=np.float32), - ], - ) - def test_push_chunk_invalid_ndim_raises_value_error(self, chunk: np.ndarray) -> None: - cursor = ActionCursor() - - with pytest.raises(ValueError, match=r"Chunk must be a 2-D or 3-D array, got ndim="): - cursor.push_chunk(chunk) From cdb3fab2b81e2324ea58bd67cba5e6c20c0937c0 Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 22:10:08 +0200 Subject: [PATCH 09/19] remove obs_to_input, move inside policy runtime --- examples/runtime/async_inference.py | 24 +-------- .../inference/preprocessors/pi05.py | 8 +-- src/physicalai/runtime/__init__.py | 3 -- src/physicalai/runtime/runtime.py | 45 ++++++---------- tests/unit/runtime/test_runtime.py | 54 +------------------ 5 files changed, 22 insertions(+), 112 deletions(-) diff --git a/examples/runtime/async_inference.py b/examples/runtime/async_inference.py index 834fe00..b2e9bb8 100644 --- a/examples/runtime/async_inference.py +++ b/examples/runtime/async_inference.py @@ -21,17 +21,14 @@ from __future__ import annotations import argparse -from typing import Any import openvino as ov import numpy as np -from numpy import core -from physicalai.capture import Frame, discover_all +from physicalai.capture import discover_all from physicalai.capture.transport import SharedCamera from physicalai.inference import InferenceModel from physicalai.robot import SO101 -from physicalai.robot.interface import RobotObservation from physicalai.runtime import ( ActionQueue, AsyncExecution, @@ -40,24 +37,6 @@ ) -def obs_to_input( - robot_obs: RobotObservation, - camera_frames: dict[str, Frame], -) -> dict[str, Any]: - """Convert robot observation + camera frames to model input dict.""" - state = np.asarray(robot_obs.joint_positions, dtype=np.float32) - if state.ndim == 1: - state = state[np.newaxis] # (state_dim,) -> (1, state_dim) - model_input: dict[str, Any] = { - "state": state, - } - for name, frame in camera_frames.items(): - # (H, W, C) -> (1, C, H, W) float32 [0, 1] - img = frame.data.transpose(2, 0, 1).astype(np.float32)[np.newaxis] / 255.0 - model_input[f"images.{name}"] = img - return model_input - - def main(): parser = argparse.ArgumentParser(description="Run policy with PolicyRuntime") parser.add_argument("--model", required=True, help="Exported model directory") @@ -97,7 +76,6 @@ def main(): action_queue=ActionQueue(smoother=LerpSmoother(duration_frames=5)), cameras=cameras, fps=args.fps, - obs_to_input=obs_to_input, ) robot.connect() diff --git a/src/physicalai/inference/preprocessors/pi05.py b/src/physicalai/inference/preprocessors/pi05.py index 0d5c68e..c3d631f 100644 --- a/src/physicalai/inference/preprocessors/pi05.py +++ b/src/physicalai/inference/preprocessors/pi05.py @@ -135,7 +135,9 @@ def _preprocess_images( if img.ndim == max_image_dim: img = img[:, -1, :, :, :] - if img.dtype != np.float32: + if img.dtype == np.uint8: + img = img.astype(np.float32) / 255.0 + elif img.dtype != np.float32: img = img.astype(np.float32) # Detect layout: assume channels-first when dim-1 == 3 @@ -152,8 +154,8 @@ def _preprocess_images( # [0, 1] -> [-1, 1] img = img * 2.0 - 1.0 - if channels_first: - img = np.transpose(img, (0, 3, 1, 2)) # -> (B, C, H, W) + # Output is always (B, C, H, W) — img is HWC at this point + img = np.transpose(img, (0, 3, 1, 2)) bsize = img.shape[0] mask = np.ones(bsize, dtype=np.bool_) diff --git a/src/physicalai/runtime/__init__.py b/src/physicalai/runtime/__init__.py index 4be467a..bbc41d0 100644 --- a/src/physicalai/runtime/__init__.py +++ b/src/physicalai/runtime/__init__.py @@ -9,7 +9,6 @@ from physicalai.runtime import SyncExecution, AsyncExecution, Execution, WorkerDiedError from physicalai.runtime import ActionQueue from physicalai.runtime import ChunkSmoother, LerpSmoother, ReplaceSmoother - from physicalai.runtime import default_observation_to_input """ from physicalai.runtime._action_queue import ActionQueue # noqa: PLC2701 @@ -23,7 +22,6 @@ PolicyRuntime, RunStats, RuntimeCallback, - default_observation_to_input, ) from physicalai.runtime.smoothers import ChunkSmoother, LerpSmoother, ReplaceSmoother @@ -39,5 +37,4 @@ "RuntimeCallback", "SyncExecution", "WorkerDiedError", - "default_observation_to_input", ] diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index b0b7e2b..b947a48 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -17,42 +17,18 @@ from physicalai.runtime.smoothers import LerpSmoother if TYPE_CHECKING: - from collections.abc import Callable, Mapping, Sequence + from collections.abc import Mapping, Sequence from physicalai.capture.camera import Camera from physicalai.capture.frame import Frame from physicalai.inference.model import InferenceModel - from physicalai.robot.interface import Robot, RobotObservation + from physicalai.robot.interface import Robot logger = logging.getLogger(__name__) _DEFAULT_LERP_FRAMES = 5 -def default_observation_to_input( - robot_obs: RobotObservation, - camera_frames: dict[str, Frame], -) -> dict[str, Any]: - """Convert robot observation and camera frames to model input dict. - - Maps: - - ``robot_obs.joint_positions`` → ``"state"`` (as batch dim) - - ``frame.data`` per camera → ``"images.{name}"`` - - Returns: - Model input dictionary. - """ - model_input: dict[str, Any] = {} - - if robot_obs.joint_positions is not None: - model_input["state"] = np.array([robot_obs.joint_positions], dtype=np.float32) - - for name, frame in camera_frames.items(): - model_input[f"images.{name}"] = frame.data - - return model_input - - class RuntimeCallback(Protocol): """Optional hook points in the PolicyRuntime control loop.""" @@ -96,7 +72,6 @@ def __init__( # noqa: D107 fps: float, cameras: Mapping[str, Camera] | None = None, action_queue: ActionQueue | None = None, - obs_to_input: Callable[[RobotObservation, dict[str, Frame]], dict[str, Any]] | None = None, callbacks: Sequence[RuntimeCallback] = (), ) -> None: if fps <= 0: @@ -108,7 +83,6 @@ def __init__( # noqa: D107 self._fps = fps self._cameras: Mapping[str, Camera] = cameras or {} self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=_DEFAULT_LERP_FRAMES)) - self._obs_to_input = obs_to_input or default_observation_to_input self._callbacks = list(callbacks) self._goal_time = (1.0 / fps) * 3 @@ -194,8 +168,19 @@ def run(self, *, duration_s: float | None = None) -> RunStats: def _build_model_input(self) -> dict[str, Any]: robot_obs = self._robot.get_observation() - camera_frames = {name: cam.read_latest() for name, cam in self._cameras.items()} - return self._obs_to_input(robot_obs, camera_frames) + model_input: dict[str, Any] = {} + + if robot_obs.joint_positions is not None: + model_input["state"] = np.array([robot_obs.joint_positions], dtype=np.float32) + + # Merge robot-embedded images and external cameras + if robot_obs.images: + for name, frame in robot_obs.images.items(): + model_input[f"images.{name}"] = frame.data[np.newaxis] + for name, cam in self._cameras.items(): + model_input[f"images.{name}"] = cam.read_latest().data[np.newaxis] + + return model_input def _shutdown(self, step: int) -> None: self._execution.stop() diff --git a/tests/unit/runtime/test_runtime.py b/tests/unit/runtime/test_runtime.py index 6f1b378..ccb01c0 100644 --- a/tests/unit/runtime/test_runtime.py +++ b/tests/unit/runtime/test_runtime.py @@ -16,7 +16,7 @@ from physicalai.runtime._action_queue import ActionQueue from physicalai.runtime.execution import SyncExecution, WorkerDiedError -from physicalai.runtime.runtime import PolicyRuntime, RunStats, default_observation_to_input +from physicalai.runtime.runtime import PolicyRuntime, RunStats from physicalai.capture import Frame @@ -235,55 +235,3 @@ def test_fields_populated(self) -> None: assert stats.total_pops == 8 assert stats.total_holds == 2 assert stats.inference_count == 3 - - -class TestDefaultObservationToInput: - def test_extracts_joint_positions(self) -> None: - positions = np.array([0.1, 0.2, 0.3], dtype=np.float32) - obs = FakeRobotObservation( - joint_positions=positions, - timestamp=0.0, - sensor_data=None, - images=None, - ) - - result = default_observation_to_input(obs, {}) - - assert "state" in result - np.testing.assert_array_equal(result["state"], np.array([positions], dtype=np.float32)) - - def test_extracts_frame_data(self) -> None: - obs = FakeRobotObservation( - joint_positions=np.zeros(3), - timestamp=0.0, - sensor_data=None, - images=None, - ) - frame = Frame(data=np.zeros((480, 640, 3), dtype=np.uint8), timestamp=0.0, sequence=0) - - result = default_observation_to_input(obs, camera_frames={"cam0": frame}) - - assert "images.cam0" in result - np.testing.assert_array_equal(result["images.cam0"], frame.data) - - def test_custom_obs_to_input(self) -> None: - robot = _make_mock_robot() - model = _make_mock_model(chunk_size=10) - execution = SyncExecution() - - custom_fn = MagicMock(return_value={"custom_key": np.zeros(3)}) - - runtime = PolicyRuntime( - robot=robot, - model=model, - execution=execution, - fps=10.0, - obs_to_input=custom_fn, - ) - - with patch("physicalai.runtime.runtime.time") as mock_time: - mock_time.perf_counter.return_value = 0.0 - mock_time.sleep = MagicMock() - runtime.run(duration_s=0.1) - - assert custom_fn.call_count >= 2 From 41003bb99955a52e3c56dcca0aa0e5c7a1505bd8 Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 22:51:09 +0200 Subject: [PATCH 10/19] Address comment: runtime manages robot/camera lifecycle --- examples/runtime/async_inference.py | 25 +++++----- src/physicalai/runtime/runtime.py | 72 ++++++++++++++++++++++++++++- tests/unit/runtime/test_runtime.py | 36 ++++++++++++--- 3 files changed, 110 insertions(+), 23 deletions(-) diff --git a/examples/runtime/async_inference.py b/examples/runtime/async_inference.py index b2e9bb8..006496c 100644 --- a/examples/runtime/async_inference.py +++ b/examples/runtime/async_inference.py @@ -78,16 +78,16 @@ def main(): fps=args.fps, ) - robot.connect() + try: + runtime.connect() + except Exception as e: + print(f"Failed to connect: {e}") + print("Available cameras:") + for c in discover_all(): + print(f" Driver: {c.driver}, Device: {c.device}, Info: {c.info}") + return + for name, cam in cameras.items(): - try: - cam.connect() - except Exception as e: - print(f"Failed to connect camera '{name}': {e}") - print("Available cameras:") - for c in discover_all(): - print(f"Driver: {c.driver}, Device: {c.device}, Info: {c.info}") - return print(f"Camera '{name}' connected: {cam.actual_width}x{cam.actual_height} @ {cam.actual_fps}fps") print("Starting policy runtime...") @@ -95,11 +95,8 @@ def main(): stats = runtime.run(duration_s=args.duration_s) print(f"\nDone — {stats.steps} steps, {stats.inference_count} inferences, {stats.total_holds} holds") finally: - for name, cam in cameras.items(): - cam.disconnect() - print(f"Camera '{name}' disconnected") - robot.disconnect() - print("Robot disconnected") + runtime.disconnect() + print("Disconnected") if __name__ == "__main__": diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index b947a48..42c1df6 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -8,7 +8,7 @@ import logging import time from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Protocol +from typing import TYPE_CHECKING, Any, Protocol, Self import numpy as np @@ -61,7 +61,11 @@ class PolicyRuntime: """Runs a policy on robot hardware. Loop: observe → maybe_request → pop → send → sleep. - Robot and cameras must be connected before run(). Caller owns lifecycle. + + Supports context manager for safe lifecycle management:: + + with PolicyRuntime(robot=robot, model=model, ...) as runtime: + stats = runtime.run(duration_s=60.0) """ def __init__( # noqa: D107 @@ -85,6 +89,66 @@ def __init__( # noqa: D107 self._action_queue = action_queue or ActionQueue(smoother=LerpSmoother(duration_frames=_DEFAULT_LERP_FRAMES)) self._callbacks = list(callbacks) self._goal_time = (1.0 / fps) * 3 + self._connected = False + + def connect(self) -> None: + """Connect robot and cameras. + + Connects robot first, then cameras in dict order. On failure, + disconnects everything already connected and re-raises. + + Idempotent — calling on an already-connected runtime is a no-op. + """ + if self._connected: + logger.debug("connect() called but already connected — no-op") + return + + self._robot.connect() + connected_cameras: list[str] = [] + try: + for name, cam in self._cameras.items(): + cam.connect() + connected_cameras.append(name) + except Exception: + for cam_name in connected_cameras: + try: + self._cameras[cam_name].disconnect() + except Exception: + logger.warning("Failed to disconnect camera '%s' during rollback", cam_name, exc_info=True) + try: + self._robot.disconnect() + except Exception: + logger.warning("Failed to disconnect robot during rollback", exc_info=True) + raise + + self._connected = True + + def disconnect(self) -> None: + """Disconnect cameras then robot. Never raises. + + Idempotent — calling on an already-disconnected runtime is a no-op. + """ + if not self._connected: + return + + for name, cam in self._cameras.items(): + try: + cam.disconnect() + except Exception: + logger.warning("Failed to disconnect camera '%s'", name, exc_info=True) + try: + self._robot.disconnect() + except Exception: + logger.warning("Failed to disconnect robot", exc_info=True) + + self._connected = False + + def __enter__(self) -> Self: + self.connect() + return self + + def __exit__(self, *exc_info: object) -> None: + self.disconnect() def run(self, *, duration_s: float | None = None) -> RunStats: """Run the control loop. @@ -96,8 +160,12 @@ def run(self, *, duration_s: float | None = None) -> RunStats: Statistics from the run session. Raises: + RuntimeError: If called before connect(). WorkerDiedError: If the inference worker thread dies. """ + if not self._connected: + msg = "PolicyRuntime.run() called before connect(). Use 'with runtime:' or call runtime.connect() first." + raise RuntimeError(msg) self._execution.start(self._model, self._action_queue) sample_obs = self._build_model_input() self._execution.warmup(sample_obs) diff --git a/tests/unit/runtime/test_runtime.py b/tests/unit/runtime/test_runtime.py index ccb01c0..839adb2 100644 --- a/tests/unit/runtime/test_runtime.py +++ b/tests/unit/runtime/test_runtime.py @@ -48,6 +48,13 @@ def _make_mock_model(chunk_size: int = 4, action_dim: int = 3) -> MagicMock: return model +def _make_runtime(**kwargs: Any) -> PolicyRuntime: + """Create a PolicyRuntime with _connected=True for testing.""" + runtime = PolicyRuntime(**kwargs) + runtime._connected = True # noqa: SLF001 + return runtime + + def _exhaustible_side_effect( initial_chunks: list[np.ndarray], action_dim: int = 2, @@ -69,7 +76,7 @@ def test_full_loop_with_duration(self) -> None: execution = SyncExecution() queue = ActionQueue() - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -94,7 +101,7 @@ def test_hold_fallback_when_queue_empty(self) -> None: execution = SyncExecution() queue = ActionQueue() - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -123,7 +130,7 @@ def test_worker_died_error_propagation(self) -> None: queue = ActionQueue() queue.push_chunk(np.random.randn(4, 3).astype(np.float32)) - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -141,7 +148,7 @@ def test_shutdown_does_not_disconnect(self) -> None: model = _make_mock_model() execution = SyncExecution() - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -155,6 +162,21 @@ def test_shutdown_does_not_disconnect(self) -> None: robot.disconnect.assert_not_called() + def test_run_raises_if_not_connected(self) -> None: + robot = _make_mock_robot() + model = _make_mock_model() + execution = SyncExecution() + + runtime = PolicyRuntime( + robot=robot, + model=model, + execution=execution, + fps=10.0, + ) + + with pytest.raises(RuntimeError, match="connect"): + runtime.run(duration_s=1.0) + class TestRuntimeCallback: def test_before_send_action_called(self) -> None: @@ -164,7 +186,7 @@ def test_before_send_action_called(self) -> None: callback = MagicMock() callback.before_send_action.return_value = None - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -186,7 +208,7 @@ def test_callback_raises_does_not_crash_loop(self) -> None: bad_callback = MagicMock() bad_callback.before_send_action.side_effect = RuntimeError("oops") - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, @@ -212,7 +234,7 @@ def test_on_hold_called_when_queue_empty(self) -> None: callback.before_send_action.return_value = None callback.on_hold.return_value = None - runtime = PolicyRuntime( + runtime = _make_runtime( robot=robot, model=model, execution=execution, From 93696bbbea7d05309a2d5d737d545b1bc4c269ba Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Tue, 19 May 2026 23:09:13 +0200 Subject: [PATCH 11/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/physicalai/runtime/runtime.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index 42c1df6..910bf2d 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -192,12 +192,14 @@ def run(self, *, duration_s: float | None = None) -> RunStats: holds = self._action_queue.consecutive_holds if holds == 1: logger.warning("Queue empty — holding position") - elif self._fps > 0 and holds % int(self._fps) == 0: - logger.warning( - "Queue starvation: %d consecutive holds (%.1fs)", - holds, - holds / self._fps, - ) + elif self._fps > 0: + warning_interval = max(int(self._fps), 1) + if holds % warning_interval == 0: + logger.warning( + "Queue starvation: %d consecutive holds (%.1fs)", + holds, + holds / self._fps, + ) self._invoke_callback("on_hold", step=step, holds=holds) if action is None: From 90effbf6bf33a2f01ba81ce27d36b06cd88c7b82 Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Tue, 19 May 2026 23:09:46 +0200 Subject: [PATCH 12/19] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/physicalai/runtime/runtime.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index 910bf2d..2458701 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -204,6 +204,10 @@ def run(self, *, duration_s: float | None = None) -> RunStats: if action is None: logger.error("No action available (warmup may have failed)") + elapsed = time.perf_counter() - loop_start + sleep_time = goal_time - elapsed + if sleep_time > 0: + time.sleep(sleep_time) step += 1 continue From ac6c8e3d366b96c94b2b6064458692035bafd9bf Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 23:16:13 +0200 Subject: [PATCH 13/19] address copilot --- src/physicalai/runtime/runtime.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index 2458701..3e3ac7f 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -280,7 +280,11 @@ def _invoke_callback(self, method: str, **kwargs: Any) -> Any: # noqa: ANN401 fn = getattr(cb, method, None) if fn is not None: try: - result = fn(**kwargs) + callback_result = fn(**kwargs) + if callback_result is not None: + result = callback_result + if method == "before_send_action": + kwargs["action"] = callback_result except Exception: logger.exception("Callback %s.%s raised", type(cb).__name__, method) return result From 211fba489681d47910f828a09a6b3827b6477040 Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 23:19:19 +0200 Subject: [PATCH 14/19] fix ruff --- src/physicalai/runtime/runtime.py | 50 ++++++++++++++++--------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index 3e3ac7f..e8f9a80 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -20,7 +20,6 @@ from collections.abc import Mapping, Sequence from physicalai.capture.camera import Camera - from physicalai.capture.frame import Frame from physicalai.inference.model import InferenceModel from physicalai.robot.interface import Robot @@ -143,11 +142,11 @@ def disconnect(self) -> None: self._connected = False - def __enter__(self) -> Self: + def __enter__(self) -> Self: # noqa: D105 self.connect() return self - def __exit__(self, *exc_info: object) -> None: + def __exit__(self, *exc_info: object) -> None: # noqa: D105 self.disconnect() def run(self, *, duration_s: float | None = None) -> RunStats: @@ -189,25 +188,11 @@ def run(self, *, duration_s: float | None = None) -> RunStats: last_action = action else: action = last_action - holds = self._action_queue.consecutive_holds - if holds == 1: - logger.warning("Queue empty — holding position") - elif self._fps > 0: - warning_interval = max(int(self._fps), 1) - if holds % warning_interval == 0: - logger.warning( - "Queue starvation: %d consecutive holds (%.1fs)", - holds, - holds / self._fps, - ) - self._invoke_callback("on_hold", step=step, holds=holds) + self._handle_hold(step=step) if action is None: logger.error("No action available (warmup may have failed)") - elapsed = time.perf_counter() - loop_start - sleep_time = goal_time - elapsed - if sleep_time > 0: - time.sleep(sleep_time) + self._tick_sleep(loop_start, goal_time) step += 1 continue @@ -217,11 +202,7 @@ def run(self, *, duration_s: float | None = None) -> RunStats: self._robot.send_action(action, goal_time=self._goal_time) self._invoke_callback("on_action_sent", action=action, step=step) - - elapsed = time.perf_counter() - loop_start - sleep_time = goal_time - elapsed - if sleep_time > 0: - time.sleep(sleep_time) + self._tick_sleep(loop_start, goal_time) step += 1 @@ -240,6 +221,27 @@ def run(self, *, duration_s: float | None = None) -> RunStats: inference_count=getattr(self._execution, "inference_count", 0), ) + def _handle_hold(self, *, step: int) -> None: + holds = self._action_queue.consecutive_holds + if holds == 1: + logger.warning("Queue empty — holding position") + elif self._fps > 0: + warning_interval = max(int(self._fps), 1) + if holds % warning_interval == 0: + logger.warning( + "Queue starvation: %d consecutive holds (%.1fs)", + holds, + holds / self._fps, + ) + self._invoke_callback("on_hold", step=step, holds=holds) + + @staticmethod + def _tick_sleep(loop_start: float, goal_time: float) -> None: + elapsed = time.perf_counter() - loop_start + sleep_time = goal_time - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + def _build_model_input(self) -> dict[str, Any]: robot_obs = self._robot.get_observation() model_input: dict[str, Any] = {} From 3d05530ee93e77d2a92aeeafc02af4db4274e24f Mon Sep 17 00:00:00 2001 From: Max X Date: Tue, 19 May 2026 23:22:28 +0200 Subject: [PATCH 15/19] fix all linting issues --- examples/runtime/async_inference.py | 5 +++-- src/physicalai/robot/__init__.py | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/examples/runtime/async_inference.py b/examples/runtime/async_inference.py index 006496c..6b07efa 100644 --- a/examples/runtime/async_inference.py +++ b/examples/runtime/async_inference.py @@ -83,8 +83,9 @@ def main(): except Exception as e: print(f"Failed to connect: {e}") print("Available cameras:") - for c in discover_all(): - print(f" Driver: {c.driver}, Device: {c.device}, Info: {c.info}") + for driver, devices in discover_all().items(): + for dev in devices: + print(f" Driver: {driver}, Device: {dev.device_id}, Info: {dev.name}") return for name, cam in cameras.items(): diff --git a/src/physicalai/robot/__init__.py b/src/physicalai/robot/__init__.py index d2159e9..2c55631 100644 --- a/src/physicalai/robot/__init__.py +++ b/src/physicalai/robot/__init__.py @@ -13,10 +13,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from physicalai.robot.connect import connect from physicalai.robot.interface import Robot, RobotObservation from physicalai.robot.verify import verify_robot +if TYPE_CHECKING: + from physicalai.robot.so101 import SO101 as SO101 + from physicalai.robot.trossen import BimanualWidowXAI as BimanualWidowXAI + from physicalai.robot.trossen import WidowXAI as WidowXAI + __all__ = [ "Robot", "RobotObservation", From 51a892773aa710d53dc1b5bacee80015c122f656 Mon Sep 17 00:00:00 2001 From: Max Xiang Date: Wed, 20 May 2026 15:55:38 +0200 Subject: [PATCH 16/19] update copyright header --- src/physicalai/inference/__init__.py | 2 +- src/physicalai/inference/adapters/__init__.py | 2 +- src/physicalai/inference/adapters/_discovery.py | 2 +- src/physicalai/inference/adapters/openvino.py | 2 +- src/physicalai/inference/model.py | 2 +- src/physicalai/runtime/__init__.py | 2 +- src/physicalai/runtime/_action_queue.py | 2 +- src/physicalai/runtime/execution.py | 2 +- src/physicalai/runtime/runtime.py | 2 +- src/physicalai/runtime/smoothers.py | 2 +- tests/unit/inference/test_adapters.py | 2 +- tests/unit/inference/test_model.py | 2 +- tests/unit/runtime/test_runtime.py | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/physicalai/inference/__init__.py b/src/physicalai/inference/__init__.py index aacf56f..186b175 100644 --- a/src/physicalai/inference/__init__.py +++ b/src/physicalai/inference/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Production inference module for exported policies. diff --git a/src/physicalai/inference/adapters/__init__.py b/src/physicalai/inference/adapters/__init__.py index f3124c7..debc1d8 100644 --- a/src/physicalai/inference/adapters/__init__.py +++ b/src/physicalai/inference/adapters/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Inference adapters for different backend runtimes. diff --git a/src/physicalai/inference/adapters/_discovery.py b/src/physicalai/inference/adapters/_discovery.py index ed00a77..154c203 100644 --- a/src/physicalai/inference/adapters/_discovery.py +++ b/src/physicalai/inference/adapters/_discovery.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Adapter discovery and factory helpers.""" diff --git a/src/physicalai/inference/adapters/openvino.py b/src/physicalai/inference/adapters/openvino.py index 08a3ff4..81b5d2e 100644 --- a/src/physicalai/inference/adapters/openvino.py +++ b/src/physicalai/inference/adapters/openvino.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """OpenVINO adapter for inference.""" diff --git a/src/physicalai/inference/model.py b/src/physicalai/inference/model.py index 8d943a9..20948c1 100644 --- a/src/physicalai/inference/model.py +++ b/src/physicalai/inference/model.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Production-ready inference model with unified API.""" diff --git a/src/physicalai/runtime/__init__.py b/src/physicalai/runtime/__init__.py index bbc41d0..4a4ec70 100644 --- a/src/physicalai/runtime/__init__.py +++ b/src/physicalai/runtime/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Runtime system for running trained policies on robot hardware. diff --git a/src/physicalai/runtime/_action_queue.py b/src/physicalai/runtime/_action_queue.py index af768a6..c195edf 100644 --- a/src/physicalai/runtime/_action_queue.py +++ b/src/physicalai/runtime/_action_queue.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations diff --git a/src/physicalai/runtime/execution.py b/src/physicalai/runtime/execution.py index 28ec86c..bb2c854 100644 --- a/src/physicalai/runtime/execution.py +++ b/src/physicalai/runtime/execution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Execution strategies for scheduling policy inference.""" diff --git a/src/physicalai/runtime/runtime.py b/src/physicalai/runtime/runtime.py index e8f9a80..801daa9 100644 --- a/src/physicalai/runtime/runtime.py +++ b/src/physicalai/runtime/runtime.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """PolicyRuntime — runs a trained policy on robot hardware.""" diff --git a/src/physicalai/runtime/smoothers.py b/src/physicalai/runtime/smoothers.py index 70d0bd9..58b1d14 100644 --- a/src/physicalai/runtime/smoothers.py +++ b/src/physicalai/runtime/smoothers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Chunk smoothers for runtime action queues.""" diff --git a/tests/unit/inference/test_adapters.py b/tests/unit/inference/test_adapters.py index f2be690..42efaba 100644 --- a/tests/unit/inference/test_adapters.py +++ b/tests/unit/inference/test_adapters.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Unit tests for inference adapters shipped with the core ``physicalai`` package. diff --git a/tests/unit/inference/test_model.py b/tests/unit/inference/test_model.py index 77e801b..b52c1de 100644 --- a/tests/unit/inference/test_model.py +++ b/tests/unit/inference/test_model.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Unit tests for InferenceModel and inference runners.""" diff --git a/tests/unit/runtime/test_runtime.py b/tests/unit/runtime/test_runtime.py index 839adb2..45b7bf8 100644 --- a/tests/unit/runtime/test_runtime.py +++ b/tests/unit/runtime/test_runtime.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025-2026 Intel Corporation +# Copyright (C) 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """Tests for physicalai.runtime.runtime.""" From 3c8b0a34ce73a1aefb106cede392c4590cd12f7a Mon Sep 17 00:00:00 2001 From: Max X Date: Thu, 21 May 2026 13:49:44 +0200 Subject: [PATCH 17/19] address PR comments --- src/physicalai/inference/model.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/physicalai/inference/model.py b/src/physicalai/inference/model.py index 20948c1..aeb4c7e 100644 --- a/src/physicalai/inference/model.py +++ b/src/physicalai/inference/model.py @@ -229,10 +229,17 @@ def predict_action_chunk(self, observation: dict[str, np.ndarray]) -> np.ndarray 2-D action chunk with shape ``(chunk_size, action_dim)``. """ outputs = self(observation) - actions = np.asarray(outputs[ACTION]) - # Drop the adapter's batch dimension so the return value matches - # the documented contract: a 2-D (chunk, action_dim) sequence. - return np.atleast_2d(np.squeeze(actions)) + actions = outputs[ACTION] + # Strip the batch dimension; reject actual batches (batch > 1). + if actions.ndim == 3: + if actions.shape[0] != 1: + msg = ( + f"Batched inference is not supported by predict_action_chunk: " + f"expected batch dimension of 1, got shape {actions.shape}" + ) + raise ValueError(msg) + actions = actions[0] + return np.atleast_2d(actions) def reset(self) -> None: """Reset policy state for new episode. From f2df02824865718ce1c2a07ebd3abc8f267e8698 Mon Sep 17 00:00:00 2001 From: Max X Date: Thu, 21 May 2026 13:57:51 +0200 Subject: [PATCH 18/19] fix style --- src/physicalai/inference/model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/physicalai/inference/model.py b/src/physicalai/inference/model.py index aeb4c7e..365f7b2 100644 --- a/src/physicalai/inference/model.py +++ b/src/physicalai/inference/model.py @@ -227,11 +227,14 @@ def predict_action_chunk(self, observation: dict[str, np.ndarray]) -> np.ndarray Returns: 2-D action chunk with shape ``(chunk_size, action_dim)``. + + Raises: + ValueError: If the output has a batch dimension greater than 1. """ outputs = self(observation) actions = outputs[ACTION] # Strip the batch dimension; reject actual batches (batch > 1). - if actions.ndim == 3: + if actions.ndim == 3: # noqa: N806 if actions.shape[0] != 1: msg = ( f"Batched inference is not supported by predict_action_chunk: " From fb12417ca372640e354439fb13c37178e6a8c425 Mon Sep 17 00:00:00 2001 From: Max X Date: Thu, 21 May 2026 13:58:55 +0200 Subject: [PATCH 19/19] fix style --- src/physicalai/inference/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/physicalai/inference/model.py b/src/physicalai/inference/model.py index 365f7b2..4964969 100644 --- a/src/physicalai/inference/model.py +++ b/src/physicalai/inference/model.py @@ -234,7 +234,7 @@ def predict_action_chunk(self, observation: dict[str, np.ndarray]) -> np.ndarray outputs = self(observation) actions = outputs[ACTION] # Strip the batch dimension; reject actual batches (batch > 1). - if actions.ndim == 3: # noqa: N806 + if actions.ndim == 3: # noqa: PLR2004 if actions.shape[0] != 1: msg = ( f"Batched inference is not supported by predict_action_chunk: "