From 83ecbdb27cf9c91a0463de2181b6454898a3d9a6 Mon Sep 17 00:00:00 2001 From: Ryan Bateman Date: Thu, 7 May 2026 21:57:44 -0800 Subject: [PATCH] fix(converter): use vkGetDescriptorEXT for descriptor buffer population --- src/converter/mod.rs | 130 ++++++++++++++++---------------------- src/converter/pipeline.rs | 40 ++++++++---- 2 files changed, 83 insertions(+), 87 deletions(-) diff --git a/src/converter/mod.rs b/src/converter/mod.rs index 1133922..0aa50b1 100644 --- a/src/converter/mod.rs +++ b/src/converter/mod.rs @@ -206,25 +206,27 @@ pub struct ColorConverter { // Cached ImageView for the source image (avoids per-frame recreation). cached_src_view: Option<(vk::Image, vk::ImageView)>, - // Output buffer (compute shader writes here) + // Output buffer (compute shader writes here). output_buffer: vk::Buffer, output_memory: vk::DeviceMemory, + // Output buffer size and device address. The address is fed into + // `VkDescriptorAddressInfoEXT` when populating the binding-1 descriptor. + output_buffer_size: usize, + output_buffer_address: vk::DeviceAddress, - // Descriptor buffer (holds captured descriptor data). + // Descriptor buffer (mapped, descriptors are written here per-frame). descriptor_buffer: vk::Buffer, descriptor_buffer_memory: vk::DeviceMemory, descriptor_buffer_address: vk::DeviceAddress, descriptor_buffer_usage: vk::BufferUsageFlags, descriptor_buffer_ptr: *mut u8, - sampler_capture_size: u32, - image_capture_size: u32, - buffer_capture_size: u32, - // Cached descriptor buffer device and per-frame capture buffers. + // Descriptor sizes for each binding type, queried from + // `VkPhysicalDeviceDescriptorBufferPropertiesEXT`. + combined_image_sampler_descriptor_size: usize, + storage_buffer_descriptor_size: usize, + // Loaded descriptor-buffer extension device for `vkGetDescriptorEXT`. ext_device: ash::ext::descriptor_buffer::Device, - sampler_data: Vec, - image_data: Vec, - buffer_data: Vec, - // Descriptor set layout binding offsets for correct payload placement. + // Per-binding offsets within the descriptor buffer. binding0_offset: u64, binding1_offset: u64, @@ -635,74 +637,54 @@ impl ColorConverter { self.pipeline, ); - // --- Opaque capture descriptors into descriptor buffer --- - // 1. Capture sampler descriptor data into preallocated buffer. - let sampler_capture_info = - vk::SamplerCaptureDescriptorDataInfoEXT::default().sampler(self.sampler); - self.ext_device - .get_sampler_opaque_capture_descriptor_data( - &sampler_capture_info, - self.sampler_data.as_mut_slice(), - ) - .map_err(|e| PixelForgeError::CommandBuffer(format!("sampler capture: {}", e)))?; - - // 2. Capture image view descriptor data into preallocated buffer. - let image_capture_info = - vk::ImageViewCaptureDescriptorDataInfoEXT::default().image_view(src_view); - self.ext_device - .get_image_view_opaque_capture_descriptor_data( - &image_capture_info, - self.image_data.as_mut_slice(), - ) - .map_err(|e| { - PixelForgeError::CommandBuffer(format!("image view capture: {}", e)) - })?; - - // 3. Capture output buffer descriptor data into preallocated buffer. - let buffer_capture_info = - vk::BufferCaptureDescriptorDataInfoEXT::default().buffer(self.output_buffer); - self.ext_device - .get_buffer_opaque_capture_descriptor_data( - &buffer_capture_info, - self.buffer_data.as_mut_slice(), - ) - .map_err(|e| PixelForgeError::CommandBuffer(format!("buffer capture: {}", e)))?; - - // 4. Write captured data into descriptor buffer (persistent map, HOST_COHERENT). + // --- Populate descriptors directly into the descriptor buffer --- // - // The descriptor buffer capture functions return driver-defined descriptor payloads - // in the format expected by the descriptor buffer. For combined image sampler - // descriptors (binding 0), the sampler and image view captures are placed - // consecutively at the binding's offset. + // Use `vkGetDescriptorEXT` for runtime descriptor population. (The + // previous implementation mistakenly used the + // `vkGet*OpaqueCaptureDescriptorDataEXT` family — those produce + // opaque payloads for capture/replay tooling and are NOT the + // descriptor data the GPU consumes at binding offsets, which made + // the compute shader read garbage and emit constant-Y output — + // visible as a green-screen stream.) // - // Layout: - // Offset binding0_offset: Sampler + image view capture payloads (binding 0) - // Offset binding1_offset: Buffer capture payload (binding 1) - - let sampler_offset = self.binding0_offset as usize; - let image_offset = sampler_offset + self.sampler_capture_size as usize; - let buffer_offset = self.binding1_offset as usize; - - // Write sampler capture payload. - std::ptr::copy_nonoverlapping( - self.sampler_data.as_ptr(), - self.descriptor_buffer_ptr.add(sampler_offset), - self.sampler_capture_size as usize, - ); - - // Write image view capture payload. - std::ptr::copy_nonoverlapping( - self.image_data.as_ptr(), - self.descriptor_buffer_ptr.add(image_offset), - self.image_capture_size as usize, + // The descriptor buffer is persistent-mapped HOST_COHERENT, so a + // plain memcpy via the slice handed to `get_descriptor` is enough. + + // Binding 0: COMBINED_IMAGE_SAMPLER (sampler + image view). + let image_info = vk::DescriptorImageInfo::default() + .sampler(self.sampler) + .image_view(src_view) + .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL); + let combined_get_info = vk::DescriptorGetInfoEXT::default() + .ty(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) + .data(vk::DescriptorDataEXT { + p_combined_image_sampler: &image_info, + }); + let combined_dst = std::slice::from_raw_parts_mut( + self.descriptor_buffer_ptr + .add(self.binding0_offset as usize), + self.combined_image_sampler_descriptor_size, ); - - // Write buffer capture payload. - std::ptr::copy_nonoverlapping( - self.buffer_data.as_ptr(), - self.descriptor_buffer_ptr.add(buffer_offset), - self.buffer_capture_size as usize, + self.ext_device + .get_descriptor(&combined_get_info, combined_dst); + + // Binding 1: STORAGE_BUFFER (output YUV). + let buffer_addr_info = vk::DescriptorAddressInfoEXT::default() + .address(self.output_buffer_address) + .range(self.output_buffer_size as u64) + .format(vk::Format::UNDEFINED); + let storage_get_info = vk::DescriptorGetInfoEXT::default() + .ty(vk::DescriptorType::STORAGE_BUFFER) + .data(vk::DescriptorDataEXT { + p_storage_buffer: &buffer_addr_info, + }); + let storage_dst = std::slice::from_raw_parts_mut( + self.descriptor_buffer_ptr + .add(self.binding1_offset as usize), + self.storage_buffer_descriptor_size, ); + self.ext_device + .get_descriptor(&storage_get_info, storage_dst); // --- Bind descriptor buffers --- let binding_info = vk::DescriptorBufferBindingInfoEXT::default() diff --git a/src/converter/pipeline.rs b/src/converter/pipeline.rs index 7b3956d..e6e6a82 100644 --- a/src/converter/pipeline.rs +++ b/src/converter/pipeline.rs @@ -99,18 +99,36 @@ pub fn create_converter( let sampler = unsafe { device.create_sampler(&sampler_info, None) } .map_err(|e| PixelForgeError::ResourceCreation(format!("sampler creation: {}", e)))?; - // Create output buffer (device local for compute shader output, transfer source for image copy) + // Create output buffer (device local for compute shader output, transfer + // source for image copy). Needs SHADER_DEVICE_ADDRESS so we can query a + // device address to feed `VkDescriptorAddressInfoEXT` when populating + // the storage-buffer descriptor at runtime. let (output_buffer, output_memory) = create_buffer( device, context.memory_properties(), output_size as vk::DeviceSize, vk::BufferUsageFlags::STORAGE_BUFFER | vk::BufferUsageFlags::TRANSFER_SRC - | vk::BufferUsageFlags::TRANSFER_DST, + | vk::BufferUsageFlags::TRANSFER_DST + | vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS, vk::MemoryPropertyFlags::DEVICE_LOCAL, )?; - // Query descriptor buffer properties to determine correct capture sizes. + // Resolve the output buffer's device address. This goes into the + // `VkDescriptorAddressInfoEXT` we pass to `vkGetDescriptorEXT` when + // populating the storage-buffer descriptor each frame. + let output_buffer_address = unsafe { + device.get_buffer_device_address( + &vk::BufferDeviceAddressInfo::default().buffer(output_buffer), + ) + }; + + // Query descriptor buffer properties to determine correct descriptor sizes. + // The bug in the previous implementation was using the *capture-replay* + // sizes (e.g. `sampler_capture_replay_descriptor_data_size`) — those are + // for capture/replay tooling like RenderDoc and have nothing to do with + // runtime descriptor population. The correct sizes for in-buffer + // descriptors are the regular `*_descriptor_size` fields. let mut db_props = vk::PhysicalDeviceDescriptorBufferPropertiesEXT::default(); let mut props = vk::PhysicalDeviceProperties2 { p_next: &mut db_props as *mut _ as *mut _, @@ -119,9 +137,8 @@ pub fn create_converter( unsafe { instance.get_physical_device_properties2(physical_device, &mut props); } - let sampler_cap_size = db_props.sampler_capture_replay_descriptor_data_size; - let image_cap_size = db_props.image_view_capture_replay_descriptor_data_size; - let buffer_cap_size = db_props.buffer_capture_replay_descriptor_data_size; + let combined_image_sampler_size = db_props.combined_image_sampler_descriptor_size; + let storage_buffer_size_descriptor = db_props.storage_buffer_descriptor_size; // Query descriptor set layout size and binding offsets for correct buffer sizing. let ext_device = @@ -224,6 +241,8 @@ pub fn create_converter( cached_src_view: None, output_buffer, output_memory, + output_buffer_size: output_size, + output_buffer_address, command_pool, command_buffer, fence, @@ -234,14 +253,9 @@ pub fn create_converter( descriptor_buffer_usage: vk::BufferUsageFlags::SHADER_DEVICE_ADDRESS | vk::BufferUsageFlags::RESOURCE_DESCRIPTOR_BUFFER_EXT, descriptor_buffer_ptr, - sampler_capture_size: sampler_cap_size as u32, - image_capture_size: image_cap_size as u32, - buffer_capture_size: buffer_cap_size as u32, - // Cached descriptor buffer device and capture buffers. + combined_image_sampler_descriptor_size: combined_image_sampler_size, + storage_buffer_descriptor_size: storage_buffer_size_descriptor, ext_device, - sampler_data: vec![0u8; sampler_cap_size], - image_data: vec![0u8; image_cap_size], - buffer_data: vec![0u8; buffer_cap_size], // Layout info for correct offset computation. binding0_offset, binding1_offset,