Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/scope/core/pipelines/wan2_1/blocks/preprocess_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def inputs(self) -> list[InputParam]:
type_hint=list[torch.Tensor] | torch.Tensor | None,
description="Input frames for VACE conditioning",
),
InputParam(
"vace_input_masks",
default=None,
type_hint=torch.Tensor | None,
description="Spatial control masks [B, 1, F, H, W] for VACE conditioning",
),
InputParam(
"height",
required=True,
Expand Down Expand Up @@ -71,6 +77,11 @@ def intermediate_outputs(self) -> list[OutputParam]:
type_hint=torch.Tensor,
description="Input video to convert into noisy latents",
),
OutputParam(
"vace_input_masks",
type_hint=torch.Tensor,
description="Resampled VACE spatial control masks [B, 1, F, H, W]",
),
]

@torch.no_grad()
Expand Down Expand Up @@ -107,6 +118,34 @@ def __call__(self, components, state: PipelineState) -> tuple[Any, PipelineState
target_num_frames=target_num_frames,
)

# Resample vace_input_masks to match target_num_frames.
# On the first chunk (current_start_frame == 0), target_num_frames is one
# greater than the default chunk size, so masks arriving from the queue
# (or a client parameter) would otherwise be one frame short, causing a
# shape mismatch inside VaceEncodingBlock._encode_with_conditioning.
if block_state.vace_input_masks is not None:
masks = block_state.vace_input_masks
if isinstance(masks, list):
masks = (
torch.stack(masks, dim=2)
if masks[0].dim() == 4
else torch.stack(masks, dim=0)
)
mask_frames = masks.shape[2]
if mask_frames != target_num_frames:
indices = (
torch.linspace(
0,
mask_frames - 1,
target_num_frames,
device=masks.device,
)
.round()
.long()
)
masks = masks[:, :, indices]
block_state.vace_input_masks = masks

self.set_block_state(state, block_state)
return components, state

Expand Down
6 changes: 6 additions & 0 deletions src/scope/server/pipeline_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,10 @@ def process_chunk(self):
processing_time = time.time() - processing_start

if not output_dict:
# 1) Some pipelines return {} when idle
# 2) For those, prepare() is None, so we never wait on input queues.
# 3) Without this sleep the worker thread would busy-loop.
self.shutdown_event.wait(SLEEP_TIME)
return

# Pass audio to output queue regardless of whether video exists.
Expand All @@ -456,6 +460,8 @@ def process_chunk(self):
# Extract video from the returned dictionary
output = output_dict.get("video")
if output is None:
self.is_prepared = True
self._pending_cache_init = False
return

# Clear one-shot parameters after use to prevent sending them on subsequent chunks
Expand Down
Loading