From df91e5f26b7a62c442327b584552fc5b4cfb4126 Mon Sep 17 00:00:00 2001 From: Hans Gaiser Date: Tue, 5 May 2026 22:53:07 +0200 Subject: [PATCH] Update ash to main branch. --- src/converter/mod.rs | 135 ++++++++++++++++++++----- src/converter/pipeline.rs | 154 ++++++++++++++++++++--------- src/encoder/dpb/h265.rs | 72 +++++++++----- src/encoder/dpb/reference_lists.rs | 19 ++-- src/encoder/resources.rs | 60 +++++++++++ src/vulkan.rs | 127 +++++++++++++++++++----- 6 files changed, 442 insertions(+), 125 deletions(-) diff --git a/src/converter/mod.rs b/src/converter/mod.rs index 32def67..1133922 100644 --- a/src/converter/mod.rs +++ b/src/converter/mod.rs @@ -199,8 +199,6 @@ pub struct ColorConverter { descriptor_set_layout: vk::DescriptorSetLayout, pipeline_layout: vk::PipelineLayout, pipeline: vk::Pipeline, - descriptor_pool: vk::DescriptorPool, - descriptor_set: vk::DescriptorSet, // Sampler for texelFetch on the source image. sampler: vk::Sampler, @@ -212,6 +210,24 @@ pub struct ColorConverter { output_buffer: vk::Buffer, output_memory: vk::DeviceMemory, + // Descriptor buffer (holds captured descriptor data). + descriptor_buffer: vk::Buffer, + descriptor_buffer_memory: vk::DeviceMemory, + descriptor_buffer_address: vk::DeviceAddress, + descriptor_buffer_usage: vk::BufferUsageFlags, + descriptor_buffer_ptr: *mut u8, + sampler_capture_size: u32, + image_capture_size: u32, + buffer_capture_size: u32, + // Cached descriptor buffer device and per-frame capture buffers. + ext_device: ash::ext::descriptor_buffer::Device, + sampler_data: Vec, + image_data: Vec, + buffer_data: Vec, + // Descriptor set layout binding offsets for correct payload placement. + binding0_offset: u64, + binding1_offset: u64, + // Command resources. command_pool: vk::CommandPool, command_buffer: vk::CommandBuffer, @@ -539,20 +555,6 @@ impl ColorConverter { let device = self.context.device(); - // Update descriptor set binding 0 with the source image view. - let image_info = vk::DescriptorImageInfo::default() - .sampler(self.sampler) - .image_view(src_view) - .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL); - - let write = vk::WriteDescriptorSet::default() - .dst_set(self.descriptor_set) - .dst_binding(0) - .descriptor_type(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) - .image_info(std::slice::from_ref(&image_info)); - - unsafe { device.update_descriptor_sets(&[write], &[]) }; - // Reset and record command buffer. unsafe { device @@ -626,20 +628,105 @@ impl ColorConverter { &[], ); - // Bind pipeline and descriptor set. + // Bind pipeline. device.cmd_bind_pipeline( self.command_buffer, vk::PipelineBindPoint::COMPUTE, self.pipeline, ); - device.cmd_bind_descriptor_sets( + // --- Opaque capture descriptors into descriptor buffer --- + // 1. Capture sampler descriptor data into preallocated buffer. + let sampler_capture_info = + vk::SamplerCaptureDescriptorDataInfoEXT::default().sampler(self.sampler); + self.ext_device + .get_sampler_opaque_capture_descriptor_data( + &sampler_capture_info, + self.sampler_data.as_mut_slice(), + ) + .map_err(|e| PixelForgeError::CommandBuffer(format!("sampler capture: {}", e)))?; + + // 2. Capture image view descriptor data into preallocated buffer. + let image_capture_info = + vk::ImageViewCaptureDescriptorDataInfoEXT::default().image_view(src_view); + self.ext_device + .get_image_view_opaque_capture_descriptor_data( + &image_capture_info, + self.image_data.as_mut_slice(), + ) + .map_err(|e| { + PixelForgeError::CommandBuffer(format!("image view capture: {}", e)) + })?; + + // 3. Capture output buffer descriptor data into preallocated buffer. + let buffer_capture_info = + vk::BufferCaptureDescriptorDataInfoEXT::default().buffer(self.output_buffer); + self.ext_device + .get_buffer_opaque_capture_descriptor_data( + &buffer_capture_info, + self.buffer_data.as_mut_slice(), + ) + .map_err(|e| PixelForgeError::CommandBuffer(format!("buffer capture: {}", e)))?; + + // 4. Write captured data into descriptor buffer (persistent map, HOST_COHERENT). + // + // The descriptor buffer capture functions return driver-defined descriptor payloads + // in the format expected by the descriptor buffer. For combined image sampler + // descriptors (binding 0), the sampler and image view captures are placed + // consecutively at the binding's offset. + // + // Layout: + // Offset binding0_offset: Sampler + image view capture payloads (binding 0) + // Offset binding1_offset: Buffer capture payload (binding 1) + + let sampler_offset = self.binding0_offset as usize; + let image_offset = sampler_offset + self.sampler_capture_size as usize; + let buffer_offset = self.binding1_offset as usize; + + // Write sampler capture payload. + std::ptr::copy_nonoverlapping( + self.sampler_data.as_ptr(), + self.descriptor_buffer_ptr.add(sampler_offset), + self.sampler_capture_size as usize, + ); + + // Write image view capture payload. + std::ptr::copy_nonoverlapping( + self.image_data.as_ptr(), + self.descriptor_buffer_ptr.add(image_offset), + self.image_capture_size as usize, + ); + + // Write buffer capture payload. + std::ptr::copy_nonoverlapping( + self.buffer_data.as_ptr(), + self.descriptor_buffer_ptr.add(buffer_offset), + self.buffer_capture_size as usize, + ); + + // --- Bind descriptor buffers --- + let binding_info = vk::DescriptorBufferBindingInfoEXT::default() + .address(self.descriptor_buffer_address) + .usage(self.descriptor_buffer_usage); + + self.ext_device.cmd_bind_descriptor_buffers( + self.command_buffer, + std::slice::from_ref(&binding_info), + ); + + // Associate set 0 in the pipeline layout with the bound descriptor buffer. + // This is required because descriptor buffers use offset-based binding. + // The base offset is 0 since all payloads are placed at their binding offsets. + let buffer_indices = [0u32]; + let offsets = [0 as vk::DeviceSize]; + + self.ext_device.cmd_set_descriptor_buffer_offsets( self.command_buffer, vk::PipelineBindPoint::COMPUTE, self.pipeline_layout, - 0, - &[self.descriptor_set], - &[], + 0, // first_set + &buffer_indices, + &offsets, ); // Push constants: width, height, input_format, output_format, color_space, full_range, sdr_white_nits. @@ -846,10 +933,14 @@ impl Drop for ColorConverter { device.destroy_buffer(self.output_buffer, None); device.free_memory(self.output_memory, None); + // Destroy descriptor buffer and its memory. + device.unmap_memory(self.descriptor_buffer_memory); + device.destroy_buffer(self.descriptor_buffer, None); + device.free_memory(self.descriptor_buffer_memory, None); + // Destroy pipeline resources. device.destroy_pipeline(self.pipeline, None); device.destroy_pipeline_layout(self.pipeline_layout, None); - device.destroy_descriptor_pool(self.descriptor_pool, None); device.destroy_descriptor_set_layout(self.descriptor_set_layout, None); // Destroy command resources. diff --git a/src/converter/pipeline.rs b/src/converter/pipeline.rs index 62d217b..7b3956d 100644 --- a/src/converter/pipeline.rs +++ b/src/converter/pipeline.rs @@ -1,5 +1,4 @@ //! Vulkan compute pipeline creation for color conversion. - use super::{ColorConverter, ColorConverterConfig}; use crate::encoder::resources::find_memory_type; use crate::error::{PixelForgeError, Result}; @@ -11,7 +10,15 @@ pub fn create_converter( context: VideoContext, config: ColorConverterConfig, ) -> Result { + if !context.has_descriptor_buffer() { + return Err(PixelForgeError::NoSuitableDevice( + "VK_EXT_descriptor_buffer with capture-replay is required but not available on this device".to_string(), + )); + } + let device = context.device(); + let instance = context.instance(); + let physical_device = context.physical_device(); // Create descriptor set layout. let bindings = [ @@ -29,7 +36,9 @@ pub fn create_converter( .stage_flags(vk::ShaderStageFlags::COMPUTE), ]; - let layout_info = vk::DescriptorSetLayoutCreateInfo::default().bindings(&bindings); + let layout_info = vk::DescriptorSetLayoutCreateInfo::default() + .flags(vk::DescriptorSetLayoutCreateFlags::DESCRIPTOR_BUFFER_EXT) + .bindings(&bindings); let descriptor_set_layout = unsafe { device.create_descriptor_set_layout(&layout_info, None) } .map_err(|e| PixelForgeError::ResourceCreation(e.to_string()))?; @@ -73,32 +82,6 @@ pub fn create_converter( // Destroy shader module (no longer needed after pipeline creation) unsafe { device.destroy_shader_module(shader_module, None) }; - // Create descriptor pool. - let pool_sizes = [ - vk::DescriptorPoolSize::default() - .ty(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) - .descriptor_count(1), - vk::DescriptorPoolSize::default() - .ty(vk::DescriptorType::STORAGE_BUFFER) - .descriptor_count(1), - ]; - - let pool_info = vk::DescriptorPoolCreateInfo::default() - .flags(vk::DescriptorPoolCreateFlags::FREE_DESCRIPTOR_SET) - .max_sets(1) - .pool_sizes(&pool_sizes); - - let descriptor_pool = unsafe { device.create_descriptor_pool(&pool_info, None) } - .map_err(|e| PixelForgeError::ResourceCreation(e.to_string()))?; - - // Allocate descriptor set. - let alloc_info = vk::DescriptorSetAllocateInfo::default() - .descriptor_pool(descriptor_pool) - .set_layouts(std::slice::from_ref(&descriptor_set_layout)); - - let descriptor_set = unsafe { device.allocate_descriptor_sets(&alloc_info) } - .map_err(|e| PixelForgeError::ResourceCreation(e.to_string()))?[0]; - // Calculate output buffer size. let output_size = config .output_format @@ -127,20 +110,87 @@ pub fn create_converter( vk::MemoryPropertyFlags::DEVICE_LOCAL, )?; - // Write only the output buffer descriptor now; the source image descriptor - // is written per-frame in convert() when we know the actual source ImageView. - let output_buffer_info = vk::DescriptorBufferInfo::default() - .buffer(output_buffer) - .offset(0) - .range(output_size as vk::DeviceSize); - - let writes = [vk::WriteDescriptorSet::default() - .dst_set(descriptor_set) - .dst_binding(1) - .descriptor_type(vk::DescriptorType::STORAGE_BUFFER) - .buffer_info(std::slice::from_ref(&output_buffer_info))]; - - unsafe { device.update_descriptor_sets(&writes, &[]) }; + // Query descriptor buffer properties to determine correct capture sizes. + let mut db_props = vk::PhysicalDeviceDescriptorBufferPropertiesEXT::default(); + let mut props = vk::PhysicalDeviceProperties2 { + p_next: &mut db_props as *mut _ as *mut _, + ..Default::default() + }; + unsafe { + instance.get_physical_device_properties2(physical_device, &mut props); + } + let sampler_cap_size = db_props.sampler_capture_replay_descriptor_data_size; + let image_cap_size = db_props.image_view_capture_replay_descriptor_data_size; + let buffer_cap_size = db_props.buffer_capture_replay_descriptor_data_size; + + // Query descriptor set layout size and binding offsets for correct buffer sizing. + let ext_device = + ash::ext::descriptor_buffer::Device::load(context.instance(), context.device()); + let vk_device = context.device().handle(); + let mut layout_size = 0u64; + unsafe { + (ext_device.fp().get_descriptor_set_layout_size_ext)( + vk_device, + descriptor_set_layout, + &mut layout_size, + ); + } + let binding0_offset = unsafe { + let mut offset = 0u64; + (ext_device.fp().get_descriptor_set_layout_binding_offset_ext)( + vk_device, + descriptor_set_layout, + 0, + &mut offset, + ); + offset + }; + let binding1_offset = unsafe { + let mut offset = 0u64; + (ext_device.fp().get_descriptor_set_layout_binding_offset_ext)( + vk_device, + descriptor_set_layout, + 1, + &mut offset, + ); + offset + }; + + // Descriptor buffer layout: + // Offset 0: Sampler + image view capture payload (binding 0) + // Offset X: Buffer capture payload (binding 1) + // The total size is the layout size which accounts for alignment. + let descriptor_buffer_size: vk::DeviceSize = layout_size as vk::DeviceSize; + + let (descriptor_buffer, descriptor_buffer_memory) = + crate::encoder::resources::create_buffer_with_device_address( + device, + context.memory_properties(), + descriptor_buffer_size, + vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS + | vk::BufferUsageFlags::RESOURCE_DESCRIPTOR_BUFFER_EXT, + vk::MemoryPropertyFlags::HOST_VISIBLE | vk::MemoryPropertyFlags::HOST_COHERENT, + )?; + + // Get the buffer's device address for binding descriptor buffers. + // cmdBindDescriptorBuffers requires the buffer's device address, not the memory capture address. + let buf_addr_info = vk::BufferDeviceAddressInfo::default().buffer(descriptor_buffer); + let descriptor_buffer_address = unsafe { device.get_buffer_device_address(&buf_addr_info) }; + + // Persistent map the descriptor buffer (HOST_COHERENT, no flush needed). + let descriptor_buffer_ptr = unsafe { + device + .map_memory( + descriptor_buffer_memory, + 0, + vk::WHOLE_SIZE, + vk::MemoryMapFlags::empty(), + ) + .map_err(|e| { + PixelForgeError::ResourceCreation(format!("map descriptor buffer: {}", e)) + })? + }; + let descriptor_buffer_ptr = descriptor_buffer_ptr as *mut u8; // Create command pool for compute queue. let pool_info = vk::CommandPoolCreateInfo::default() @@ -170,8 +220,6 @@ pub fn create_converter( descriptor_set_layout, pipeline_layout, pipeline, - descriptor_pool, - descriptor_set, sampler, cached_src_view: None, output_buffer, @@ -179,6 +227,24 @@ pub fn create_converter( command_pool, command_buffer, fence, + // Descriptor buffer fields. + descriptor_buffer, + descriptor_buffer_memory, + descriptor_buffer_address, + descriptor_buffer_usage: vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS + | vk::BufferUsageFlags::RESOURCE_DESCRIPTOR_BUFFER_EXT, + descriptor_buffer_ptr, + sampler_capture_size: sampler_cap_size as u32, + image_capture_size: image_cap_size as u32, + buffer_capture_size: buffer_cap_size as u32, + // Cached descriptor buffer device and capture buffers. + ext_device, + sampler_data: vec![0u8; sampler_cap_size], + image_data: vec![0u8; image_cap_size], + buffer_data: vec![0u8; buffer_cap_size], + // Layout info for correct offset computation. + binding0_offset, + binding1_offset, }) } diff --git a/src/encoder/dpb/h265.rs b/src/encoder/dpb/h265.rs index c4b1759..86e1e54 100644 --- a/src/encoder/dpb/h265.rs +++ b/src/encoder/dpb/h265.rs @@ -7,10 +7,7 @@ //! - Reference picture list construction for P and B frames //! - Temporal layer support //! - CRA (Clean Random Access) handling - -// Many loops in this module use index-based iteration because they access. -// multiple arrays with the same index or use the index for bit operations. -#![allow(clippy::needless_range_loop)] +use std::cmp::Reverse; use super::entry::{DpbEntry, DpbState, MarkingState}; use super::reference_lists::{H265ReferenceListBuilder, ReferenceList}; @@ -276,8 +273,12 @@ impl DpbH265 { let mut k = 0usize; // Process S0 (negative) - for i in 0..short_term_rps.num_negative_pics as usize { - let poc = current_poc + delta_poc_s0[i]; + for (i, &delta) in delta_poc_s0 + .iter() + .enumerate() + .take(short_term_rps.num_negative_pics as usize) + { + let poc = current_poc + delta; if (short_term_rps.used_by_curr_pic_s0_flag >> i) & 1 != 0 { poc_st_curr_before[j] = poc; j += 1; @@ -290,8 +291,12 @@ impl DpbH265 { j = 0; // Process S1 (positive) - for i in 0..short_term_rps.num_positive_pics as usize { - let poc = current_poc + delta_poc_s1[i]; + for (i, &delta) in delta_poc_s1 + .iter() + .enumerate() + .take(short_term_rps.num_positive_pics as usize) + { + let poc = current_poc + delta; if (short_term_rps.used_by_curr_pic_s1_flag >> i) & 1 != 0 { poc_st_curr_after[j] = poc; j += 1; @@ -306,8 +311,12 @@ impl DpbH265 { // Process long-term references. j = 0; k = 0; - for i in 0..num_long_term_pics as usize { - let poc_lt = long_term_poc_lsb[i] as i32; + for (i, &poc_lsb) in long_term_poc_lsb + .iter() + .enumerate() + .take(num_long_term_pics as usize) + { + let poc_lt = poc_lsb as i32; if (used_by_curr_pic_lt >> i) & 1 != 0 { poc_lt_curr[j] = poc_lt; j += 1; @@ -320,13 +329,17 @@ impl DpbH265 { // Map POCs to DPB indices. // stCurrBefore - for i in 0..self.num_poc_st_curr_before as usize { + for (i, &poc) in poc_st_curr_before + .iter() + .enumerate() + .take(self.num_poc_st_curr_before as usize) + { ref_pic_set.st_curr_before[i] = -1; for d in 0..self.max_dpb_size as usize { let entry = &self.entries[d]; if entry.state == DpbState::InUse && entry.marking == MarkingState::ShortTerm - && entry.pic_order_cnt == poc_st_curr_before[i] + && entry.pic_order_cnt == poc { ref_pic_set.st_curr_before[i] = d as i8; break; @@ -336,13 +349,17 @@ impl DpbH265 { ref_pic_set.num_st_curr_before = self.num_poc_st_curr_before as u8; // stCurrAfter - for i in 0..self.num_poc_st_curr_after as usize { + for (i, &poc) in poc_st_curr_after + .iter() + .enumerate() + .take(self.num_poc_st_curr_after as usize) + { ref_pic_set.st_curr_after[i] = -1; for d in 0..self.max_dpb_size as usize { let entry = &self.entries[d]; if entry.state == DpbState::InUse && entry.marking == MarkingState::ShortTerm - && entry.pic_order_cnt == poc_st_curr_after[i] + && entry.pic_order_cnt == poc { ref_pic_set.st_curr_after[i] = d as i8; break; @@ -352,14 +369,18 @@ impl DpbH265 { ref_pic_set.num_st_curr_after = self.num_poc_st_curr_after as u8; // ltCurr - for i in 0..self.num_poc_lt_curr as usize { + for (i, &poc) in poc_lt_curr + .iter() + .enumerate() + .take(self.num_poc_lt_curr as usize) + { ref_pic_set.lt_curr[i] = -1; let mask = self.max_poc_lsb - 1; for d in 0..self.max_dpb_size as usize { let entry = &self.entries[d]; if entry.state == DpbState::InUse && entry.marking != MarkingState::Unused - && (entry.pic_order_cnt & mask) == poc_lt_curr[i] + && (entry.pic_order_cnt & mask) == poc { ref_pic_set.lt_curr[i] = d as i8; // Mark as long-term. @@ -388,8 +409,8 @@ impl DpbH265 { } // stFoll and ltFoll would also be marked as in_use - for i in 0..self.max_dpb_size as usize { - if !in_use[i] && self.entries[i].marking != MarkingState::Unused { + for (i, used) in in_use.iter().enumerate().take(self.max_dpb_size as usize) { + if !used && self.entries[i].marking != MarkingState::Unused { self.entries[i].mark_unused(); } } @@ -442,10 +463,10 @@ impl DpbH265 { } // Sort negative refs by POC descending (closest to current first) - negative_refs.sort_by(|a, b| b.0.cmp(&a.0)); + negative_refs.sort_by_key(|b| Reverse(b.0)); // Sort positive refs by POC ascending (closest to current first) - positive_refs.sort_by(|a, b| a.0.cmp(&b.0)); + positive_refs.sort_by_key(|a| a.0); // Limit to DPB size - 1. let max_refs = (self.max_dpb_size - 1) as usize; @@ -623,9 +644,14 @@ impl DecodedPictureBufferTrait for DpbH265 { // Collect reference POCs and long-term flags first. let mut ref_pocs = [0i32; MAX_DPB_SIZE]; let mut long_term_mask = 0u32; - for i in 0..self.max_dpb_size as usize { - ref_pocs[i] = self.entries[i].pic_order_cnt; - if self.entries[i].marking == MarkingState::LongTerm { + for (i, entry) in self + .entries + .iter() + .enumerate() + .take(self.max_dpb_size as usize) + { + ref_pocs[i] = entry.pic_order_cnt; + if entry.marking == MarkingState::LongTerm { long_term_mask |= 1 << i; } } diff --git a/src/encoder/dpb/reference_lists.rs b/src/encoder/dpb/reference_lists.rs index 370ffeb..a670c8f 100644 --- a/src/encoder/dpb/reference_lists.rs +++ b/src/encoder/dpb/reference_lists.rs @@ -1,7 +1,6 @@ //! Reference picture list construction and management. -// Loops in this module use index-based iteration because they need the DPB slot index. -#![allow(clippy::needless_range_loop)] +use std::cmp::Reverse; use super::entry::{DpbEntry, DpbState, MarkingState}; use super::{PictureType, MAX_DPB_SIZE, MAX_REF_LIST_SIZE}; @@ -116,8 +115,7 @@ impl H264ReferenceListBuilder { let mut long_term_refs: Vec = Vec::new(); // Collect short-term and long-term references. - for i in 0..dpb_size as usize { - let entry = &dpb[i]; + for (i, entry) in dpb.iter().enumerate().take(dpb_size as usize) { if entry.state != DpbState::InUse { continue; } @@ -157,10 +155,10 @@ impl H264ReferenceListBuilder { } // Sort short-term by descending PicNum. - short_term_refs.sort_by(|a, b| b.pic_num.cmp(&a.pic_num)); + short_term_refs.sort_by_key(|b| Reverse(b.pic_num)); // Sort long-term by ascending LongTermPicNum. - long_term_refs.sort_by(|a, b| a.long_term_pic_num.cmp(&b.long_term_pic_num)); + long_term_refs.sort_by_key(|a| a.long_term_pic_num); // Build L0: short-term first, then long-term. for r in short_term_refs { @@ -197,8 +195,7 @@ impl H264ReferenceListBuilder { let mut long_term_refs: Vec = Vec::new(); // Collect references categorized by POC. - for i in 0..dpb_size as usize { - let entry = &dpb[i]; + for (i, entry) in dpb.iter().enumerate().take(dpb_size as usize) { if entry.state != DpbState::InUse { continue; } @@ -237,13 +234,13 @@ impl H264ReferenceListBuilder { } // Sort refs_before by descending POC (closest to current first) - refs_before.sort_by(|a, b| b.poc.cmp(&a.poc)); + refs_before.sort_by_key(|b| Reverse(b.poc)); // Sort refs_after by ascending POC (closest to current first) - refs_after.sort_by(|a, b| a.poc.cmp(&b.poc)); + refs_after.sort_by_key(|a| a.poc); // Sort long-term by ascending LongTermPicNum. - long_term_refs.sort_by(|a, b| a.long_term_pic_num.cmp(&b.long_term_pic_num)); + long_term_refs.sort_by_key(|a| a.long_term_pic_num); // Build L0: refs_before, refs_after, long_term. for r in &refs_before { diff --git a/src/encoder/resources.rs b/src/encoder/resources.rs index 02af49c..d63d157 100644 --- a/src/encoder/resources.rs +++ b/src/encoder/resources.rs @@ -135,6 +135,66 @@ pub(crate) fn make_codec_name(codec_name: &[u8]) -> [std::ffi::c_char; 256] { name } +/// Create a buffer that requires device addresses (SHADER_DEVICE_ADDRESS usage). +/// +/// This allocates memory with `VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT` so that +/// `get_buffer_device_address` returns a valid address. +pub(crate) fn create_buffer_with_device_address( + device: &ash::Device, + memory_properties: &vk::PhysicalDeviceMemoryProperties, + size: vk::DeviceSize, + usage: vk::BufferUsageFlags, + properties: vk::MemoryPropertyFlags, +) -> Result<(vk::Buffer, vk::DeviceMemory)> { + let buffer_info = vk::BufferCreateInfo::default() + .size(size) + .usage(usage) + .sharing_mode(vk::SharingMode::EXCLUSIVE); + + let buffer = unsafe { device.create_buffer(&buffer_info, None) } + .map_err(|e| PixelForgeError::ResourceCreation(format!("buffer creation: {}", e)))?; + + let mem_requirements = unsafe { device.get_buffer_memory_requirements(buffer) }; + + let memory_type_index = find_memory_type( + memory_properties, + mem_requirements.memory_type_bits, + properties, + ) + .ok_or_else(|| { + PixelForgeError::MemoryAllocation(format!( + "No suitable memory type for buffer with properties {:?}", + properties + )) + })?; + + let mut alloc_flags_info = + vk::MemoryAllocateFlagsInfo::default().flags(vk::MemoryAllocateFlags::DEVICE_ADDRESS); + let mut alloc_info = vk::MemoryAllocateInfo::default() + .allocation_size(mem_requirements.size) + .memory_type_index(memory_type_index); + alloc_info.p_next = &mut alloc_flags_info as *mut _ as *mut _; + + let memory = match unsafe { device.allocate_memory(&alloc_info, None) } { + Ok(m) => m, + Err(e) => { + unsafe { device.destroy_buffer(buffer, None) }; + return Err(PixelForgeError::MemoryAllocation(e.to_string())); + } + }; + + match unsafe { device.bind_buffer_memory(buffer, memory, 0) } { + Ok(()) => Ok((buffer, memory)), + Err(e) => { + unsafe { + device.destroy_buffer(buffer, None); + device.free_memory(memory, None); + } + Err(PixelForgeError::MemoryAllocation(e.to_string())) + } + } +} + pub(crate) fn find_memory_type( memory_props: &vk::PhysicalDeviceMemoryProperties, type_filter: u32, diff --git a/src/vulkan.rs b/src/vulkan.rs index 9fc5d1b..f3fa382 100644 --- a/src/vulkan.rs +++ b/src/vulkan.rs @@ -1,9 +1,4 @@ //! Vulkan context and initialization for video encoding. -//! -//! Note: Vulkan p_next chaining requires creating default structs and then assigning p_next, -//! which triggers clippy::field_reassign_with_default. This is the correct pattern for Vulkan. -#![allow(clippy::field_reassign_with_default)] - use crate::encoder::Codec; use crate::error::{PixelForgeError, Result}; use ash::vk; @@ -82,6 +77,7 @@ struct VideoContextInner { memory_properties: vk::PhysicalDeviceMemoryProperties, device_properties: vk::PhysicalDeviceProperties, supported_encode_codecs: Vec, + has_descriptor_buffer: bool, } impl Drop for VideoContextInner { @@ -158,6 +154,11 @@ impl VideoContext { pub fn device_properties(&self) -> &vk::PhysicalDeviceProperties { &self.inner.device_properties } + + /// Returns true if `VK_EXT_descriptor_buffer` is available and enabled. + pub fn has_descriptor_buffer(&self) -> bool { + self.inner.has_descriptor_buffer + } } impl VideoContext { @@ -242,6 +243,7 @@ impl VideoContext { let mut transfer_queue_family = u32::MAX; let mut compute_queue_family = u32::MAX; let mut supported_encode_codecs = Vec::new(); + let mut has_descriptor_buffer_ext = false; for physical_device in physical_devices { let props = unsafe { instance.get_physical_device_properties(physical_device) }; @@ -307,6 +309,9 @@ impl VideoContext { }) }; + // Check if descriptor buffer extension is available. + has_descriptor_buffer_ext = has_extension(ash::ext::descriptor_buffer::NAME); + // Only check codec support if the extension exists if has_extension(ash::khr::video_encode_h264::NAME) && Self::check_h264_encode_support(&entry, &instance, physical_device, eq) @@ -437,6 +442,13 @@ impl VideoContext { } } + // Enable VK_EXT_descriptor_buffer extension (required for descriptor buffer API). + if has_descriptor_buffer_ext { + push_ext(ash::ext::descriptor_buffer::NAME.as_ptr()); + } else { + warn!("VK_EXT_descriptor_buffer not available on this device"); + } + // Enable synchronization2 feature. let mut sync2_features = vk::PhysicalDeviceSynchronization2Features::default().synchronization2(true); @@ -472,6 +484,58 @@ impl VideoContext { sync2_features.p_next = (&mut ycbcr_features as *mut vk::PhysicalDeviceSamplerYcbcrConversionFeatures).cast(); + // Query descriptor buffer and buffer device address feature support. + let mut desc_buf_features = vk::PhysicalDeviceDescriptorBufferFeaturesEXT::default(); + let mut buffer_device_address_features = + vk::PhysicalDeviceBufferDeviceAddressFeatures::default(); + + if has_descriptor_buffer_ext { + let mut feat2 = vk::PhysicalDeviceFeatures2 { + p_next: (&mut desc_buf_features + as *mut vk::PhysicalDeviceDescriptorBufferFeaturesEXT) + .cast(), + ..Default::default() + }; + unsafe { + instance.get_physical_device_features2(physical_device, &mut feat2); + } + let desc_buf_supported = desc_buf_features.descriptor_buffer != 0 + && desc_buf_features.descriptor_buffer_capture_replay != 0; + + // Query buffer device address support. + let mut feat2_bda = vk::PhysicalDeviceFeatures2 { + p_next: (&mut buffer_device_address_features + as *mut vk::PhysicalDeviceBufferDeviceAddressFeatures) + .cast(), + ..Default::default() + }; + unsafe { + instance.get_physical_device_features2(physical_device, &mut feat2_bda); + } + + if desc_buf_supported && buffer_device_address_features.buffer_device_address != 0 { + desc_buf_features.descriptor_buffer = 1; + desc_buf_features.descriptor_buffer_capture_replay = 1; + } else if desc_buf_supported { + warn!("VK_EXT_descriptor_buffer extension present but bufferDeviceAddress not supported; descriptor buffer will not be enabled"); + } + } + + // Build the feature chain: desc_buf_features -> buffer_device_address_features -> sync2_features -> ... + // Only chain desc_buf_features if the extension was available. + if has_descriptor_buffer_ext { + buffer_device_address_features.p_next = + (&mut sync2_features as *mut vk::PhysicalDeviceSynchronization2Features).cast(); + desc_buf_features.p_next = (&mut buffer_device_address_features + as *mut vk::PhysicalDeviceBufferDeviceAddressFeatures) + .cast(); + } + + // Store whether descriptor buffer is available for use by callers. + let has_descriptor_buffer = has_descriptor_buffer_ext + && desc_buf_features.descriptor_buffer != 0 + && desc_buf_features.descriptor_buffer_capture_replay != 0; + // Log all extensions being enabled debug!("Enabling {} device extensions:", extension_names.len()); for ext_name_ptr in &extension_names { @@ -484,8 +548,14 @@ impl VideoContext { .enabled_extension_names(&extension_names); // Attach the chain to device_create_info. - device_create_info.p_next = - (&mut sync2_features as *mut vk::PhysicalDeviceSynchronization2Features).cast(); + // When descriptor buffer is available, the chain is: + // desc_buf_features -> buffer_device_address_features -> sync2_features -> ... + // When descriptor buffer is not available, only sync2_features is chained. + device_create_info.p_next = if has_descriptor_buffer_ext { + (&mut desc_buf_features as *mut vk::PhysicalDeviceDescriptorBufferFeaturesEXT).cast() + } else { + (&mut sync2_features as *mut vk::PhysicalDeviceSynchronization2Features).cast() + }; let device = unsafe { instance.create_device(physical_device, &device_create_info, None) } .map_err(|e| PixelForgeError::DeviceCreation(e.to_string()))?; @@ -518,6 +588,7 @@ impl VideoContext { memory_properties, device_properties, supported_encode_codecs, + has_descriptor_buffer, }), }) } @@ -547,13 +618,15 @@ impl VideoContext { profile_info.p_next = (&mut h264_profile as *mut vk::VideoEncodeH264ProfileInfoKHR).cast(); // Create capabilities structures. - let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR::default(); let mut h264_capabilities = vk::VideoEncodeH264CapabilitiesKHR::default(); - encode_capabilities.p_next = - &mut h264_capabilities as *mut vk::VideoEncodeH264CapabilitiesKHR as *mut _; - let mut capabilities = vk::VideoCapabilitiesKHR::default(); - capabilities.p_next = - &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _; + let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR { + p_next: &mut h264_capabilities as *mut vk::VideoEncodeH264CapabilitiesKHR as *mut _, + ..Default::default() + }; + let mut capabilities = vk::VideoCapabilitiesKHR { + p_next: &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _, + ..Default::default() + }; // Query capabilities. let result = unsafe { @@ -610,13 +683,15 @@ impl VideoContext { profile_info.p_next = (&mut h265_profile as *mut vk::VideoEncodeH265ProfileInfoKHR).cast(); // Create capabilities structures. - let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR::default(); let mut h265_capabilities = vk::VideoEncodeH265CapabilitiesKHR::default(); - encode_capabilities.p_next = - &mut h265_capabilities as *mut vk::VideoEncodeH265CapabilitiesKHR as *mut _; - let mut capabilities = vk::VideoCapabilitiesKHR::default(); - capabilities.p_next = - &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _; + let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR { + p_next: &mut h265_capabilities as *mut vk::VideoEncodeH265CapabilitiesKHR as *mut _, + ..Default::default() + }; + let mut capabilities = vk::VideoCapabilitiesKHR { + p_next: &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _, + ..Default::default() + }; // Query capabilities. let result = unsafe { @@ -672,13 +747,15 @@ impl VideoContext { profile_info.p_next = (&mut av1_profile as *mut vk::VideoEncodeAV1ProfileInfoKHR).cast(); // Create capabilities structures. - let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR::default(); let mut av1_capabilities = vk::VideoEncodeAV1CapabilitiesKHR::default(); - encode_capabilities.p_next = - &mut av1_capabilities as *mut vk::VideoEncodeAV1CapabilitiesKHR as *mut _; - let mut capabilities = vk::VideoCapabilitiesKHR::default(); - capabilities.p_next = - &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _; + let mut encode_capabilities = vk::VideoEncodeCapabilitiesKHR { + p_next: &mut av1_capabilities as *mut vk::VideoEncodeAV1CapabilitiesKHR as *mut _, + ..Default::default() + }; + let mut capabilities = vk::VideoCapabilitiesKHR { + p_next: &mut encode_capabilities as *mut vk::VideoEncodeCapabilitiesKHR as *mut _, + ..Default::default() + }; // Query capabilities. let result = unsafe {