Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/converter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,12 @@ impl ColorConverter {
///
/// # Returns
/// Returns `Ok(())` on success. The target_image is transitioned to VIDEO_ENCODE_SRC_KHR.
/// Convert an RGB source image to YUV, writing to the target image.
///
/// Submits the command buffer and waits synchronously on a fence before
/// returning. The caller is responsible for any further sync between
/// convert and downstream consumers (e.g. an encoder reading the target
/// image).
pub fn convert(
&mut self,
src_image: vk::Image,
Expand Down Expand Up @@ -858,7 +864,8 @@ impl ColorConverter {
.map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?;
}

// Submit and wait.
// Submit and wait synchronously on the fence — no semaphore overlap
// with the encoder; the caller is responsible for any further sync.
unsafe {
device
.reset_fences(&[self.fence])
Expand Down
134 changes: 90 additions & 44 deletions src/encoder/av1/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ impl AV1Encoder {
/// This image can be used as a target for `ColorConverter::convert` to avoid
/// an intermediate copy.
pub fn input_image(&self) -> vk::Image {
self.input_image
self.slots[self.current_slot].input_image
}

/// Encode a frame from a GPU image.
Expand All @@ -26,30 +26,68 @@ impl AV1Encoder {
/// The encoder will panic at creation time if B-frames are enabled (b_frame_count > 0),
/// as B-frame encoding is not yet supported.
pub fn encode(&mut self, src_image: vk::Image) -> Result<Vec<EncodedPacket>> {
let prev_packet = self.drain_current_slot()?;

let gop_position = self.gop.get_next_frame();
let display_order = self.input_frame_num;
self.input_frame_num += 1;

debug!(
"AV1 encode: frame {} from GPU image, type={:?}",
display_order, gop_position.frame_type
"AV1 encode: frame {} type={:?}, slot={}",
display_order, gop_position.frame_type, self.current_slot
);

// Upload from GPU image.
self.upload_from_image(src_image)?;
self.encode_current_frame(&gop_position, display_order)?;

// Encode immediately.
let packet = self.encode_current_frame(&gop_position, display_order)?;
self.current_slot = (self.current_slot + 1) % self.slots.len();
Ok(prev_packet.into_iter().collect())
}

Ok(vec![packet])
fn drain_current_slot(&mut self) -> Result<Option<EncodedPacket>> {
if !self.slots[self.current_slot].in_flight {
return Ok(None);
}
let bitstream = unsafe {
crate::encoder::resources::wait_and_read_bitstream(
self.context.device(),
self.slots[self.current_slot].encode_fence,
self.slots[self.current_slot].query_pool,
self.slots[self.current_slot].bitstream_buffer_ptr,
)?
};
self.slots[self.current_slot].in_flight = false;
let meta = self.slots[self.current_slot]
.pending_metadata
.take()
.ok_or_else(|| {
PixelForgeError::CommandBuffer(
"Drained slot has bitstream but no metadata; encoder state corrupted"
.to_string(),
)
})?;
// AV1 always prefixes a Temporal Delimiter OBU; key frames also need
// the sequence header captured at submit time.
let mut data = vec![0x12, 0x00];
if let Some(header) = meta.header {
data.extend_from_slice(&header);
}
data.extend_from_slice(&bitstream);
Ok(Some(EncodedPacket {
data,
frame_type: meta.frame_type,
is_key_frame: meta.is_key_frame,
pts: meta.pts,
dts: meta.dts,
}))
}

/// Internal method to encode the current frame already uploaded to input_image.
fn encode_current_frame(
&mut self,
gop_position: &GopPosition,
display_order: u64,
) -> Result<EncodedPacket> {
) -> Result<()> {
let is_key_frame =
gop_position.frame_type.is_idr() || gop_position.frame_type == GopFrameType::I;
let is_reference = gop_position.is_reference;
Expand All @@ -75,38 +113,41 @@ impl AV1Encoder {
}
}

let mut encoded_data = Vec::new();

// AV1 Temporal Delimiter OBU: type=2, has_size=1, size=0.
// Required as the first OBU in each temporal unit for conformant bitstreams.
// This enables ffmpeg's AV1 demuxer to detect frame boundaries in raw OBU streams.
encoded_data.extend_from_slice(&[0x12, 0x00]);

// For key frames, prepend the AV1 Sequence Header OBU.
// This is required for AV1 decoders to initialize (equivalent to H.265 VPS/SPS/PPS).
if is_key_frame {
// For key frames, capture the AV1 Sequence Header OBU to be prepended
// at drain time. (The Temporal Delimiter prefix is added in
// drain_current_slot for every frame.)
let header = if is_key_frame {
if self.header_data.is_none() {
let header = self.get_av1_sequence_header()?;
let h = self.get_av1_sequence_header()?;
debug!(
"AV1 sequence header ({} bytes): {:02X?}",
header.len(),
&header[..std::cmp::min(32, header.len())]
h.len(),
&h[..std::cmp::min(32, h.len())]
);
self.header_data = Some(header);
self.header_data = Some(h);
}
if let Some(ref header) = self.header_data {
encoded_data.extend_from_slice(header);
}
}
self.header_data.clone()
} else {
None
};

encoded_data.extend_from_slice(&self.encode_frame_internal(gop_position, is_key_frame)?);
// Submit the encode (no wait, no readback). Marks the slot in_flight.
self.encode_frame_internal(gop_position, is_key_frame)?;

// Save the order_hint used during encoding BEFORE incrementing.
let encoded_order_hint = self.order_hint;
let dts = self.encode_frame_num;
self.encode_frame_num += 1;
self.frame_num += 1;
self.order_hint = (self.order_hint + 1) & 0xFF; // 8-bit order hint

self.slots[self.current_slot].pending_metadata = Some(super::SlotPacketMetadata {
frame_type,
is_key_frame,
pts: display_order,
dts,
header,
});

// Only KEY frames are stored as references. P frames all reference the KEY frame
// and don't update any reference buffer, avoiding P→P which produces corrupt output
// on NVIDIA AV1 encoders.
Expand All @@ -131,19 +172,25 @@ impl AV1Encoder {
// P frames reuse the same scratch DPB slot (current_dpb_slot stays unchanged
// between P frames since it's always different from the KEY frame's slot).

Ok(EncodedPacket {
data: encoded_data,
frame_type,
is_key_frame,
pts: display_order,
dts: self.encode_frame_num - 1,
})
Ok(())
}

/// Flush the encoder and get any remaining packets.
/// Flush the encoder and drain any remaining in-flight slots.
pub fn flush(&mut self) -> Result<Vec<EncodedPacket>> {
// No buffered frames in the current implementation.
Ok(Vec::new())
let mut out = Vec::new();
for offset in 0..self.slots.len() {
let idx = (self.current_slot + offset) % self.slots.len();
if !self.slots[idx].in_flight {
continue;
}
let saved_current = self.current_slot;
self.current_slot = idx;
if let Some(packet) = self.drain_current_slot()? {
out.push(packet);
}
self.current_slot = saved_current;
}
Ok(out)
}

/// Request that the next frame be an IDR/key frame.
Expand Down Expand Up @@ -214,17 +261,16 @@ impl AV1Encoder {
/// containing the updated color configuration. The next encoded frame will
/// be a key frame with the new sequence header prepended.
pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> {
// Wait for any in-flight encode to complete before modifying session params.
// Do NOT reset the fence here — submit_encode_and_read_bitstream() resets it
// before queue_submit. Leaving the fence signaled allows consecutive
// set_color_description() calls without deadlock.
// Wait for ALL slot fences before modifying session params. Do NOT reset
// here; submit_encode_only resets each fence on submit.
let fences: Vec<vk::Fence> = self.slots.iter().map(|s| s.encode_fence).collect();
unsafe {
self.context
.device()
.wait_for_fences(&[self.encode_fence], true, u64::MAX)
.wait_for_fences(&fences, true, u64::MAX)
.map_err(|e| {
PixelForgeError::Synchronization(format!(
"Failed to wait for encode fence: {:?}",
"Failed to wait for encode fences: {:?}",
e
))
})?;
Expand Down
Loading
Loading