From 12a4751b486b651a0a0a6e67068c74228a280b25 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Mon, 20 Apr 2026 17:44:53 -0400 Subject: [PATCH 01/16] =?UTF-8?q?rtx:=20Rename=20premultiplyBackground=20?= =?UTF-8?q?=E2=86=92=20premultipliedAlpha=20and=20clarify=20semantics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renames both the GPU struct field (RendererGPUData::premultiplyBackground) and the public ANARI parameter, and updates its description from "pre-multiply alpha channel with background color" to "pre-multiply RGB by alpha in the composited output pixel". Also exposes the parameter on the third renderer that previously lacked it. --- devices/rtx/device/gpu/gpu_objects.h | 2 +- .../rtx/device/gpu/renderer/raygen_helpers.h | 2 +- devices/rtx/device/renderer/Renderer.cpp | 4 ++-- devices/rtx/device/renderer/Renderer.h | 2 +- devices/rtx/device/visrtx_device.json | 17 +++++++++++++---- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/devices/rtx/device/gpu/gpu_objects.h b/devices/rtx/device/gpu/gpu_objects.h index 549daaadf..beb3e0292 100644 --- a/devices/rtx/device/gpu/gpu_objects.h +++ b/devices/rtx/device/gpu/gpu_objects.h @@ -715,7 +715,7 @@ struct RendererGPUData float inverseVolumeSamplingRate; float occlusionDistance; bool cullTriangleBF; - bool premultiplyBackground; + bool premultipliedAlpha; bool fireflyFilter; // enable internal tonemapping during sample accumulation glm::vec4 cutPlane; // cutting plane (nx,ny,nz,d); disabled when all zero (GPU // default) diff --git a/devices/rtx/device/gpu/renderer/raygen_helpers.h b/devices/rtx/device/gpu/renderer/raygen_helpers.h index 8069090f3..c3735d243 100644 --- a/devices/rtx/device/gpu/renderer/raygen_helpers.h +++ b/devices/rtx/device/gpu/renderer/raygen_helpers.h @@ -204,7 +204,7 @@ VISRTX_DEVICE void renderPixel(FrameGPUData &frameData, ScreenSample ss) // Accumulate background for remaining transparency const auto bg = getBackground(frameData, ss.screen, ray.dir); - const bool premultiplyBg = rendererParams.premultiplyBackground; + const bool premultiplyBg = rendererParams.premultipliedAlpha; vec3 bgColor = premultiplyBg ? vec3(bg) * bg.a : vec3(bg); accumulateValue(outputColor, bgColor, outputOpacity); diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp index 7b87c161e..971ecbcc4 100644 --- a/devices/rtx/device/renderer/Renderer.cpp +++ b/devices/rtx/device/renderer/Renderer.cpp @@ -170,7 +170,7 @@ void Renderer::commitParameters() m_cullTriangleBF = getParam("cullTriangleBackfaces", false); m_volumeSamplingRate = std::clamp(getParam("volumeSamplingRate", 0.125f), 1e-3f, 10.f); - m_premultiplyBackground = getParam("premultiplyBackground", false); + m_premultipliedAlpha = getParam("premultipliedAlpha", false); m_cutPlane = getParam("cutPlane", vec4(0.f)); if (m_checkerboard) m_spp = 1; @@ -211,7 +211,7 @@ void Renderer::populateFrameData(FrameGPUData &fd) const fd.renderer.fireflyFilter = m_fireflyFilter; fd.renderer.inverseVolumeSamplingRate = 1.f / m_volumeSamplingRate; fd.renderer.numIterations = std::max(m_spp, 1); - fd.renderer.premultiplyBackground = m_premultiplyBackground; + fd.renderer.premultipliedAlpha = m_premultipliedAlpha; fd.renderer.cutPlane = m_cutPlane; } diff --git a/devices/rtx/device/renderer/Renderer.h b/devices/rtx/device/renderer/Renderer.h index 07ea51883..219c2ab47 100644 --- a/devices/rtx/device/renderer/Renderer.h +++ b/devices/rtx/device/renderer/Renderer.h @@ -94,7 +94,7 @@ struct Renderer : public Object true}; // enable internal tonemapping during sample accumulation int m_sampleLimit{0}; bool m_cullTriangleBF{false}; - bool m_premultiplyBackground{false}; + bool m_premultipliedAlpha{false}; float m_volumeSamplingRate{1.f}; vec4 m_cutPlane{0.f}; diff --git a/devices/rtx/device/visrtx_device.json b/devices/rtx/device/visrtx_device.json index f7fcfe613..3c26c1287 100644 --- a/devices/rtx/device/visrtx_device.json +++ b/devices/rtx/device/visrtx_device.json @@ -105,13 +105,13 @@ "description": "suppress fireflies via reversible tonemapping before accumulation" }, { - "name": "premultiplyBackground", + "name": "premultipliedAlpha", "types": [ "ANARI_BOOL" ], "tags": [], "default": false, - "description": "pre-multiply alpha channel with background color" + "description": "pre-multiply RGB by alpha in the composited output pixel" }, { "name": "cullTriangleBackfaces", @@ -266,13 +266,13 @@ "description": "suppress fireflies via reversible tonemapping before accumulation" }, { - "name": "premultiplyBackground", + "name": "premultipliedAlpha", "types": [ "ANARI_BOOL" ], "tags": [], "default": false, - "description": "pre-multiply alpha channel with background color" + "description": "pre-multiply RGB by alpha in the composited output pixel" }, { "name": "cullTriangleBackfaces", @@ -406,6 +406,15 @@ "default": true, "description": "suppress fireflies via reversible tonemapping before accumulation" }, + { + "name": "premultipliedAlpha", + "types": [ + "ANARI_BOOL" + ], + "tags": [], + "default": false, + "description": "pre-multiply RGB by alpha in the composited output pixel" + }, { "name": "cullTriangleBackfaces", "types": [ From 31ba35b32dc789158b06b31267d59a8655f9ea75 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 16:44:46 -0400 Subject: [PATCH 02/16] rtx: Extract tonemap/inverseTonemap to gpu_tonemap.h for use from non-PTX CUDA sources --- devices/rtx/device/gpu/gpu_tonemap.h | 67 ++++++++++++++++++++++++++++ devices/rtx/device/gpu/gpu_util.h | 26 +---------- 2 files changed, 69 insertions(+), 24 deletions(-) create mode 100644 devices/rtx/device/gpu/gpu_tonemap.h diff --git a/devices/rtx/device/gpu/gpu_tonemap.h b/devices/rtx/device/gpu/gpu_tonemap.h new file mode 100644 index 000000000..25f876158 --- /dev/null +++ b/devices/rtx/device/gpu/gpu_tonemap.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +// Tonemap helpers — safe to include from both PTX and regular CUDA sources. +// gpu_util.h includes and cannot be used from Frame.cu; +// this header provides the subset needed by the compositing kernel. +#pragma once + +#include "gpu_math.h" +// glm +#include +#include +#include + +namespace visrtx { +namespace detail { + +VISRTX_DEVICE vec3 tonemap(vec3 v) +{ + return v / (1.0f + glm::max(0.0f, compMax(v))); +} + +VISRTX_DEVICE vec3 inverseTonemap(vec3 v) +{ + return v / glm::max(1e-12f, 1.f - compMax(v)); +} + +VISRTX_DEVICE vec4 tonemap(vec4 v) +{ + return vec4(tonemap(vec3(v)), v.w); +} + +VISRTX_DEVICE vec4 inverseTonemap(vec4 v) +{ + return vec4(inverseTonemap(vec3(v)), v.w); +} + +} // namespace detail +} // namespace visrtx diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index 9ea8b894e..8b02d709c 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -34,6 +34,8 @@ #include "cameraCreateRay.h" #include "gpu/gpu_debug.h" #include "gpu_objects.h" +#include "gpu_tonemap.h" + // optix #include // std @@ -350,30 +352,6 @@ VISRTX_DEVICE uint32_t computeGeometryPrimId(const SurfaceHit &hit) namespace detail { -VISRTX_DEVICE -vec3 tonemap(vec3 v) -{ - return v / (1.0f + max(0.0f, compMax(v))); -} - -VISRTX_DEVICE -vec3 inverseTonemap(vec3 v) -{ - return v / max(1e-12f, 1.f - compMax(v)); -} - -VISRTX_DEVICE -vec4 tonemap(vec4 v) -{ - return vec4(tonemap(vec3(v)), v.w); -} - -VISRTX_DEVICE -vec4 inverseTonemap(vec4 v) -{ - return vec4(inverseTonemap(vec3(v)), v.w); -} - template VISRTX_DEVICE void accumValue(T *arr, size_t idx, const T &v) { From 6803d81aacae0090e16f7ab6485425d809a49f44 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 16:54:34 -0400 Subject: [PATCH 03/16] rtx: Replace bg blend with HDRI-only accumulation in raygen_helpers --- devices/rtx/device/gpu/renderer/raygen_helpers.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/devices/rtx/device/gpu/renderer/raygen_helpers.h b/devices/rtx/device/gpu/renderer/raygen_helpers.h index c3735d243..b51eb3c26 100644 --- a/devices/rtx/device/gpu/renderer/raygen_helpers.h +++ b/devices/rtx/device/gpu/renderer/raygen_helpers.h @@ -202,13 +202,12 @@ VISRTX_DEVICE void renderPixel(FrameGPUData &frameData, ScreenSample ss) // Otherwise, continue through transparent surface } - // Accumulate background for remaining transparency - const auto bg = getBackground(frameData, ss.screen, ray.dir); - const bool premultiplyBg = rendererParams.premultipliedAlpha; - vec3 bgColor = premultiplyBg ? vec3(bg) * bg.a : vec3(bg); - - accumulateValue(outputColor, bgColor, outputOpacity); - accumulateValue(outputOpacity, bg.a, outputOpacity); + // Accumulate HDRI sky — marks sky pixels as opaque so the background + // compositing pass does not bleed through HDRI-covered pixels. + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + accumulateValue(outputColor, hdri, outputOpacity); + accumulateValue(outputOpacity, 1.f, outputOpacity); + } // Write accumulated sample to framebuffer accumPixelSample(frameData, From e2e4e1816bed20a081b2cec1eaf61ee56305ac1a Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 16:54:34 -0400 Subject: [PATCH 04/16] rtx: Quality_ptx replace bg blend with HDRI-only accumulation Drops the unconditional getBackground/bg.a blend on path miss and replaces it with HDRI-only accumulation via getBackgroundLight, gated on !volumeSample.didScatter so a scattered volume sample doesn't re-add the sky. --- devices/rtx/device/renderer/Quality_ptx.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index e1a2b36f6..852533c36 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -143,9 +143,9 @@ VISRTX_DEVICE LightSample sampleLights(ScreenSample &ss, // curand_uniform returns (0,1], invert to get [0,numLights). // Clamp to handle float rounding when curand returns a subnormal. - const size_t selectedIdx = glm::min( - size_t((1.0f - curand_uniform(&ss.rs)) * float(numLights)), - numLights - 1); + const size_t selectedIdx = + glm::min(size_t((1.0f - curand_uniform(&ss.rs)) * float(numLights)), + numLights - 1); const float radianceWeight = float(numLights); @@ -399,10 +399,11 @@ VISRTX_GLOBAL void __raygen__() }; } - if (!surfaceHit.foundHit) { - const auto bg = getBackground(frameData, ss.screen, ray.dir); - sample.color += sampleContribution * vec3(bg) * bg.a; - accumulateValue(sample.opacity, bg.a, sample.opacity); + if (!surfaceHit.foundHit && !volumeSample.didScatter) { + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + sample.color += sampleContribution * hdri; + accumulateValue(sample.opacity, 1.f, sample.opacity); + } if (isFirstBounce && !firstHitAssigned) { setPixelIds(frameData.fb, ss.pixel, ray.t.upper, ~0u, ~0u, ~0u); From adae2aa4aa2c658cffdcb8dc0c360003954aa2ad Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:07:43 -0400 Subject: [PATCH 05/16] rtx: replace getBackgroundImage with HDRI-only for compositing pass in Debug_ptx --- devices/rtx/device/renderer/Debug_ptx.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/devices/rtx/device/renderer/Debug_ptx.cu b/devices/rtx/device/renderer/Debug_ptx.cu index 8c6a1f2af..372e6ae13 100644 --- a/devices/rtx/device/renderer/Debug_ptx.cu +++ b/devices/rtx/device/renderer/Debug_ptx.cu @@ -237,7 +237,10 @@ VISRTX_GLOBAL void __raygen__() auto ray = makePrimaryRay(ss, true /*pixel centered*/); - auto color = vec3(getBackgroundImage(frameData.renderer, ss.screen)); + vec3 color{0.f}; + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + color = hdri; + } auto depth = ray.t.upper; auto normal = ray.dir; uint32_t primID = ~0u; From 01047f6b0affcf2c2ba0f0910fa2214ac46c96a1 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:19:25 -0400 Subject: [PATCH 06/16] rtx: extract format conversion from launch() into convertOutput() --- devices/rtx/device/frame/Denoiser.cu | 47 +++++++++++++++------------- devices/rtx/device/frame/Denoiser.h | 9 ++++-- devices/rtx/device/frame/Frame.cu | 4 ++- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/devices/rtx/device/frame/Denoiser.cu b/devices/rtx/device/frame/Denoiser.cu index 86668d807..807363f41 100644 --- a/devices/rtx/device/frame/Denoiser.cu +++ b/devices/rtx/device/frame/Denoiser.cu @@ -130,30 +130,33 @@ void Denoiser::launch() (CUdeviceptr)m_scratch.ptr(), static_cast(m_scratch.bytes()))); instrument::rangePop(); // optixDenoiserInvoke() +} - if (m_format != ANARI_FLOAT32_VEC4) { - instrument::rangePush("denoiser transform pixels"); - auto numPixels = - size_t(m_layer.output.width) * size_t(m_layer.output.height); - auto begin = thrust::device_ptr((vec4 *)m_pixelBuffer->dataDevice()); - auto end = begin + numPixels; - if (m_format == ANARI_UFIXED8_RGBA_SRGB) { - thrust::transform(thrust::cuda::par.on(state.stream), - begin, - end, - thrust::device_pointer_cast(m_uintPixels.dataDevice()), - [] __device__(const vec4 &in) { - return glm::packUnorm4x8(glm::convertLinearToSRGB(in)); - }); - } else { - thrust::transform(thrust::cuda::par.on(state.stream), - begin, - end, - thrust::device_pointer_cast(m_uintPixels.dataDevice()), - [] __device__(const vec4 &in) { return glm::packUnorm4x8(in); }); - } - instrument::rangePop(); // denoiser transform pixels +void Denoiser::convertOutput() +{ + if (m_format == ANARI_FLOAT32_VEC4) + return; + auto &state = *deviceState(); + instrument::rangePush("denoiser transform pixels"); + auto numPixels = size_t(m_layer.output.width) * size_t(m_layer.output.height); + auto begin = thrust::device_ptr((vec4 *)m_pixelBuffer->dataDevice()); + auto end = begin + numPixels; + if (m_format == ANARI_UFIXED8_RGBA_SRGB) { + thrust::transform(thrust::cuda::par.on(state.stream), + begin, + end, + thrust::device_pointer_cast(m_uintPixels.dataDevice()), + [] __device__(const vec4 &in) { + return glm::packUnorm4x8(glm::convertLinearToSRGB(in)); + }); + } else { + thrust::transform(thrust::cuda::par.on(state.stream), + begin, + end, + thrust::device_pointer_cast(m_uintPixels.dataDevice()), + [] __device__(const vec4 &in) { return glm::packUnorm4x8(in); }); } + instrument::rangePop(); // denoiser transform pixels } void *Denoiser::mapColorBuffer() diff --git a/devices/rtx/device/frame/Denoiser.h b/devices/rtx/device/frame/Denoiser.h index 541d9b2c9..10de73d6d 100644 --- a/devices/rtx/device/frame/Denoiser.h +++ b/devices/rtx/device/frame/Denoiser.h @@ -41,12 +41,15 @@ struct Denoiser : public Object Denoiser(DeviceGlobalState *s); ~Denoiser() override; - void setup( - uvec2 size, HostDeviceArray &pixelBuffer, ANARIDataType format, - DeviceBuffer &accumAlbedo, DeviceBuffer &accumNormal); + void setup(uvec2 size, + HostDeviceArray &pixelBuffer, + ANARIDataType format, + DeviceBuffer &accumAlbedo, + DeviceBuffer &accumNormal); void cleanup(); void launch(); + void convertOutput(); void *mapColorBuffer(); void *mapGPUColorBuffer(); diff --git a/devices/rtx/device/frame/Frame.cu b/devices/rtx/device/frame/Frame.cu index 2aa877979..794f9e15a 100644 --- a/devices/rtx/device/frame/Frame.cu +++ b/devices/rtx/device/frame/Frame.cu @@ -334,8 +334,10 @@ void Frame::renderFrame() else hd.fb.frameID += m_renderer->spp(); - if (m_denoise) + if (m_denoise) { m_denoiser.launch(); + m_denoiser.convertOutput(); + } if (m_callback) { cudaLaunchHostFunc( From 1cb151ca17aa2d658b4408e4a98a38e698417d98 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:45:06 -0400 Subject: [PATCH 07/16] rtx: add compositeBackground kernel and wire it into renderFrame() Introduces a CUDA kernel (compositeBackground) and its launcher that composite the rendered accumulator over the background and emit the final pixel format (FLOAT, UINT, or SRGB). Hooks it into both the denoise and non-denoise paths in Frame::renderFrame(). --- devices/rtx/device/frame/Frame.cu | 137 ++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/devices/rtx/device/frame/Frame.cu b/devices/rtx/device/frame/Frame.cu index 794f9e15a..51b9eda04 100644 --- a/devices/rtx/device/frame/Frame.cu +++ b/devices/rtx/device/frame/Frame.cu @@ -30,9 +30,12 @@ */ #include "Frame.h" +#include "gpu/createScreenSample.h" +#include "gpu/gpu_tonemap.h" #include "utility/instrument.h" // std #include +#include #include // thrust #include @@ -42,6 +45,113 @@ namespace visrtx { +namespace { + +__global__ void compositeBackground(vec4 *__restrict__ accumColor, + vec4 *__restrict__ pixelBuf, + uint32_t *__restrict__ uintBuf, + RendererGPUData renderer, + uvec2 size, + vec2 invSize, + FrameFormat format, + int frameID, + int checkerboardID, + bool isDenoised) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + const uint32_t px = idx % size.x; + const uint32_t py = idx / size.x; + + uint32_t sourceIdx = idx; + int divisor = frameID; + if (checkerboardID >= 0 && checkerboardID < 3) { + const int pixTile = (px & 1) | ((py & 1) << 1); + if (pixTile <= checkerboardID) + divisor = frameID + 1; + else if (frameID == 0) { + const uint32_t sourcePx = px & ~1u; + const uint32_t sourcePy = py & ~1u; + sourceIdx = sourcePx + sourcePy * size.x; + divisor = 1; + } + } + if (divisor == 0) + return; + + vec4 rendered; + if (isDenoised) { + // The denoiser fills pixelBuf at every pixel, so reading from sourceIdx + // would race against another thread compositing into that same slot. + // Read RGB from this thread's own pixel; only the alpha needs the + // checkerboard source redirect because accumColor is sparse. + rendered = pixelBuf[idx]; + rendered.a = accumColor[sourceIdx].a / float(divisor); + } else { + rendered = accumColor[sourceIdx] / float(divisor); + if (renderer.fireflyFilter) + rendered = detail::inverseTonemap(rendered); + } + + const vec2 uv = (vec2(px, py) + 0.5f) * invSize; + + vec4 bg; + if (renderer.backgroundMode == BackgroundMode::COLOR) { + bg = renderer.background.color; + } else { + const auto s = tex2D(renderer.background.texobj, uv.x, uv.y); + bg = vec4(s.x, s.y, s.z, s.w); + } + + vec3 rgb = vec3(rendered); + float alpha = rendered.a; + accumulateValue(rgb, vec3(bg) * bg.a, alpha); + accumulateValue(alpha, bg.a, alpha); + + if (!renderer.premultipliedAlpha && alpha > 0.0f) + rgb *= 1.0f / alpha; + + vec4 rgba = vec4(rgb, alpha); + if (format == FrameFormat::SRGB) { + uintBuf[idx] = glm::packUnorm4x8(glm::convertLinearToSRGB(rgba)); + } else if (format == FrameFormat::UINT) { + uintBuf[idx] = glm::packUnorm4x8(rgba); + } else { + pixelBuf[idx] = rgba; + } +} + +void launchCompositeBackground(vec4 *accumColor, + vec4 *pixelBuf, + uint32_t *uintBuf, + const RendererGPUData &renderer, + uvec2 size, + vec2 invSize, + FrameFormat format, + int frameID, + int checkerboardID, + bool isDenoised, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + compositeBackground<<>>(accumColor, + pixelBuf, + uintBuf, + renderer, + size, + invSize, + format, + frameID, + checkerboardID, + isDenoised); +} + +} // anonymous namespace + Frame::Frame(DeviceGlobalState *d) : helium::BaseFrame(d), m_denoiser(d) { cudaEventCreate(&m_eventStart); @@ -334,9 +444,36 @@ void Frame::renderFrame() else hd.fb.frameID += m_renderer->spp(); + const bool useFloatOutput = m_denoise || m_colorType == ANARI_FLOAT32_VEC4; + if (m_denoise) { m_denoiser.launch(); + + launchCompositeBackground(m_accumColor.ptrAs(), + (vec4 *)m_pixelBuffer.dataDevice(), + nullptr, + hd.renderer, + hd.fb.size, + hd.fb.invSize, + FrameFormat::FLOAT, + hd.fb.frameID, + hd.fb.checkerboardID, + /*isDenoised=*/true, + state.stream); + m_denoiser.convertOutput(); + } else { + launchCompositeBackground(m_accumColor.ptrAs(), + useFloatOutput ? (vec4 *)m_pixelBuffer.dataDevice() : nullptr, + useFloatOutput ? nullptr : (uint32_t *)m_pixelBuffer.dataDevice(), + hd.renderer, + hd.fb.size, + hd.fb.invSize, + hd.fb.format, + hd.fb.frameID, + hd.fb.checkerboardID, + /*isDenoised=*/false, + state.stream); } if (m_callback) { From f4863b5e4d8b2f5d56649731f37c7a0baae15bc1 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Fri, 24 Apr 2026 15:22:46 -0400 Subject: [PATCH 08/16] rtx: feed denoiser dense per-pixel albedo/normal/input estimates Sparse accumulator writes under checkerboarding caused flicker; the 2x2 dilation hack papered over gaps instead of fixing them. Two new post-launch kernels (prepareDenoiseInput, prepareDenoiseGuides) cover every pixel using a resolveSample helper that redirects non-rendered checker tiles to their source accumulator. Side effects: accumPixelSample simplifies to pure accumulation (no output-buffer writes, no frameIDOffset); writeOutputColor is deleted; outColorVec4, outColorUint, and FrameFormat are removed from FramebufferGPUData. --- devices/rtx/device/frame/Denoiser.cu | 35 +-- devices/rtx/device/frame/Denoiser.h | 7 +- devices/rtx/device/frame/Frame.cu | 205 +++++++++++++++--- devices/rtx/device/frame/Frame.h | 9 +- devices/rtx/device/gpu/gpu_objects.h | 3 - devices/rtx/device/gpu/gpu_util.h | 54 +---- .../rtx/device/gpu/renderer/raygen_helpers.h | 3 +- devices/rtx/device/renderer/Quality_ptx.cu | 11 +- 8 files changed, 215 insertions(+), 112 deletions(-) diff --git a/devices/rtx/device/frame/Denoiser.cu b/devices/rtx/device/frame/Denoiser.cu index 807363f41..328ca30ef 100644 --- a/devices/rtx/device/frame/Denoiser.cu +++ b/devices/rtx/device/frame/Denoiser.cu @@ -50,15 +50,16 @@ Denoiser::~Denoiser() } void Denoiser::setup(uvec2 size, - HostDeviceArray &pixelBuffer, + HostDeviceArray &outputBuffer, ANARIDataType format, - DeviceBuffer &accumAlbedo, - DeviceBuffer &accumNormal) + DeviceBuffer &input, + DeviceBuffer &albedo, + DeviceBuffer &normal) { - init(accumAlbedo, accumNormal); + init(albedo, normal); auto &state = *deviceState(); - m_pixelBuffer = &pixelBuffer; + m_pixelBuffer = &outputBuffer; m_format = format; @@ -83,22 +84,24 @@ void Denoiser::setup(uvec2 size, (CUdeviceptr)m_scratch.ptr(), m_scratch.bytes())); - m_layer.input.data = (CUdeviceptr)pixelBuffer.dataDevice(); + m_layer.input.data = (CUdeviceptr)input.ptr(); m_layer.input.width = size.x; m_layer.input.height = size.y; m_layer.input.pixelStrideInBytes = 0; m_layer.input.rowStrideInBytes = 4 * sizeof(float) * size.x; m_layer.input.format = OPTIX_PIXEL_FORMAT_FLOAT4; - std::memcpy(&m_layer.output, &m_layer.input, sizeof(m_layer.output)); - m_guideLayer.albedo.data = (CUdeviceptr)accumAlbedo.ptr(); + m_layer.output = m_layer.input; + m_layer.output.data = (CUdeviceptr)outputBuffer.dataDevice(); + + m_guideLayer.albedo.data = (CUdeviceptr)albedo.ptr(); m_guideLayer.albedo.width = size.x; m_guideLayer.albedo.height = size.y; m_guideLayer.albedo.pixelStrideInBytes = 3 * sizeof(float); m_guideLayer.albedo.rowStrideInBytes = 3 * sizeof(float) * size.x; m_guideLayer.albedo.format = OPTIX_PIXEL_FORMAT_FLOAT3; - m_guideLayer.normal.data = (CUdeviceptr)accumNormal.ptr(); + m_guideLayer.normal.data = (CUdeviceptr)normal.ptr(); m_guideLayer.normal.width = size.x; m_guideLayer.normal.height = size.y; m_guideLayer.normal.pixelStrideInBytes = 3 * sizeof(float); @@ -188,7 +191,6 @@ void Denoiser::init( m_denoiser = {}; } - auto &state = *deviceState(); m_usingAlbedo = useAlbedo; m_usingNormal = useNormal; @@ -196,10 +198,13 @@ void Denoiser::init( options.guideAlbedo = m_usingAlbedo; options.guideNormal = m_usingNormal; - OPTIX_CHECK(optixDenoiserCreate(state.optixContext, - OPTIX_DENOISER_MODEL_KIND_AOV, - &options, - &m_denoiser)); + if (!m_denoiser) { + auto &state = *deviceState(); + OPTIX_CHECK(optixDenoiserCreate(state.optixContext, + OPTIX_DENOISER_MODEL_KIND_AOV, + &options, + &m_denoiser)); + } } -} // namespace visrtx \ No newline at end of file +} // namespace visrtx diff --git a/devices/rtx/device/frame/Denoiser.h b/devices/rtx/device/frame/Denoiser.h index 10de73d6d..2196ebd10 100644 --- a/devices/rtx/device/frame/Denoiser.h +++ b/devices/rtx/device/frame/Denoiser.h @@ -42,10 +42,11 @@ struct Denoiser : public Object ~Denoiser() override; void setup(uvec2 size, - HostDeviceArray &pixelBuffer, + HostDeviceArray &outputBuffer, ANARIDataType format, - DeviceBuffer &accumAlbedo, - DeviceBuffer &accumNormal); + DeviceBuffer &input, + DeviceBuffer &albedo, + DeviceBuffer &normal); void cleanup(); void launch(); diff --git a/devices/rtx/device/frame/Frame.cu b/devices/rtx/device/frame/Frame.cu index 51b9eda04..71a2ba9ae 100644 --- a/devices/rtx/device/frame/Frame.cu +++ b/devices/rtx/device/frame/Frame.cu @@ -30,8 +30,8 @@ */ #include "Frame.h" -#include "gpu/createScreenSample.h" #include "gpu/gpu_tonemap.h" +#include "gpu/gpu_util.h" #include "utility/instrument.h" // std #include @@ -47,6 +47,126 @@ namespace visrtx { namespace { +// Resolve per-pixel (sourceIdx, divisor) for the current sub-frame. Mirrors +// compositeBackground so both kernels agree on which accumulator sample count +// and source pixel to read under checkerboarding. +__device__ bool resolveSample(uint32_t idx, + uvec2 size, + int frameID, + int checkerboardID, + uint32_t &sourceIdx, + int &divisor) +{ + sourceIdx = idx; + divisor = frameID; + if (checkerboardID >= 0 && checkerboardID < 3) { + const uint32_t px = idx % size.x; + const uint32_t py = idx / size.x; + const int pixTile = (px & 1) | ((py & 1) << 1); + if (pixTile <= checkerboardID) { + divisor = frameID + 1; + } else if (frameID == 0) { + sourceIdx = (px & ~1u) + (py & ~1u) * size.x; + divisor = 1; + } + } + return divisor > 0; +} + +__global__ void prepareDenoiseInput(const vec4 *__restrict__ accumColor, + vec4 *__restrict__ denoiseInput, + uvec2 size, + int frameID, + int checkerboardID, + bool fireflyFilter) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + uint32_t srcIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, srcIdx, divisor)) { + denoiseInput[idx] = vec4(0.f); + return; + } + + vec4 c = accumColor[srcIdx] / float(divisor); + if (fireflyFilter) + c = detail::inverseTonemap(c); + denoiseInput[idx] = c; +} + +void launchPrepareDenoiseInput(const vec4 *accumColor, + vec4 *denoiseInput, + uvec2 size, + int frameID, + int checkerboardID, + bool fireflyFilter, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + prepareDenoiseInput<<>>( + accumColor, denoiseInput, size, frameID, checkerboardID, fireflyFilter); +} + +__global__ void prepareDenoiseGuides(const vec3 *__restrict__ accumAlbedo, + const vec3 *__restrict__ accumNormal, + vec3 *__restrict__ denoiseAlbedo, + vec3 *__restrict__ denoiseNormal, + uvec2 size, + int frameID, + int checkerboardID) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + uint32_t srcIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, srcIdx, divisor)) { + if (denoiseAlbedo) + denoiseAlbedo[idx] = vec3(0.f); + if (denoiseNormal) + denoiseNormal[idx] = vec3(0.f); + return; + } + + const float invDivisor = 1.0f / float(divisor); + if (denoiseAlbedo) + denoiseAlbedo[idx] = accumAlbedo[srcIdx] * invDivisor; + + if (denoiseNormal) { + const vec3 n = accumNormal[srcIdx]; + const float len = glm::length(n); + constexpr float NORMAL_EPSILON = 1e-6f; + denoiseNormal[idx] = len > NORMAL_EPSILON ? n * (1.0f / len) : vec3(0.f); + } +} + +void launchPrepareDenoiseGuides(const vec3 *accumAlbedo, + const vec3 *accumNormal, + vec3 *denoiseAlbedo, + vec3 *denoiseNormal, + uvec2 size, + int frameID, + int checkerboardID, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + prepareDenoiseGuides<<>>(accumAlbedo, + accumNormal, + denoiseAlbedo, + denoiseNormal, + size, + frameID, + checkerboardID); +} + __global__ void compositeBackground(vec4 *__restrict__ accumColor, vec4 *__restrict__ pixelBuf, uint32_t *__restrict__ uintBuf, @@ -62,25 +182,14 @@ __global__ void compositeBackground(vec4 *__restrict__ accumColor, if (idx >= size.x * size.y) return; + uint32_t sourceIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, sourceIdx, divisor)) + return; + const uint32_t px = idx % size.x; const uint32_t py = idx / size.x; - uint32_t sourceIdx = idx; - int divisor = frameID; - if (checkerboardID >= 0 && checkerboardID < 3) { - const int pixTile = (px & 1) | ((py & 1) << 1); - if (pixTile <= checkerboardID) - divisor = frameID + 1; - else if (frameID == 0) { - const uint32_t sourcePx = px & ~1u; - const uint32_t sourcePy = py & ~1u; - sourceIdx = sourcePx + sourcePy * size.x; - divisor = 1; - } - } - if (divisor == 0) - return; - vec4 rendered; if (isDenoised) { // The denoiser fills pixelBuf at every pixel, so reading from sourceIdx @@ -221,12 +330,6 @@ void Frame::finalize() auto &hd = data(); const bool useFloatFB = m_denoise || m_colorType == ANARI_FLOAT32_VEC4; - if (useFloatFB) - hd.fb.format = FrameFormat::FLOAT; - else if (m_colorType == ANARI_UFIXED8_RGBA_SRGB) - hd.fb.format = FrameFormat::SRGB; - else - hd.fb.format = FrameFormat::UINT; hd.fb.invSize = 1.f / vec2(hd.fb.size); @@ -263,15 +366,23 @@ void Frame::finalize() else m_accumNormal.reset(); - hd.fb.buffers.colorAccumulation = m_accumColor.ptrAs(); - - hd.fb.buffers.outColorVec4 = nullptr; - hd.fb.buffers.outColorUint = nullptr; + if (m_denoise) { + m_denoiseInput.reserve(numPixels() * sizeof(vec4)); + if (m_denoiseUsingAlbedo) + m_denoiseAlbedo.reserve(numPixels() * sizeof(vec3)); + else + m_denoiseAlbedo.reset(); + if (m_denoiseUsingNormal) + m_denoiseNormal.reserve(numPixels() * sizeof(vec3)); + else + m_denoiseNormal.reset(); + } else { + m_denoiseInput.reset(); + m_denoiseAlbedo.reset(); + m_denoiseNormal.reset(); + } - if (useFloatFB) - hd.fb.buffers.outColorVec4 = (vec4 *)m_pixelBuffer.dataDevice(); - else - hd.fb.buffers.outColorUint = (uint32_t *)m_pixelBuffer.dataDevice(); + hd.fb.buffers.colorAccumulation = m_accumColor.ptrAs(); hd.fb.buffers.depth = channelDepth ? m_depthBuffer.dataDevice() : nullptr; hd.fb.buffers.primID = channelPrimID ? m_primIDBuffer.dataDevice() : nullptr; @@ -281,8 +392,12 @@ void Frame::finalize() hd.fb.buffers.normal = channelNormal ? m_accumNormal.ptrAs() : nullptr; if (m_denoise) - m_denoiser.setup( - hd.fb.size, m_pixelBuffer, m_colorType, m_accumAlbedo, m_accumNormal); + m_denoiser.setup(hd.fb.size, + m_pixelBuffer, + m_colorType, + m_denoiseInput, + m_denoiseAlbedo, + m_denoiseNormal); else m_denoiser.cleanup(); @@ -447,6 +562,25 @@ void Frame::renderFrame() const bool useFloatOutput = m_denoise || m_colorType == ANARI_FLOAT32_VEC4; if (m_denoise) { + launchPrepareDenoiseInput(m_accumColor.ptrAs(), + m_denoiseInput.ptrAs(), + hd.fb.size, + hd.fb.frameID, + hd.fb.checkerboardID, + hd.renderer.fireflyFilter, + state.stream); + + if (m_denoiseUsingAlbedo || m_denoiseUsingNormal) { + launchPrepareDenoiseGuides(m_accumAlbedo.ptrAs(), + m_accumNormal.ptrAs(), + m_denoiseAlbedo.ptrAs(), + m_denoiseNormal.ptrAs(), + hd.fb.size, + hd.fb.frameID, + hd.fb.checkerboardID, + state.stream); + } + m_denoiser.launch(); launchCompositeBackground(m_accumColor.ptrAs(), @@ -463,13 +597,16 @@ void Frame::renderFrame() m_denoiser.convertOutput(); } else { + const FrameFormat outFormat = useFloatOutput ? FrameFormat::FLOAT + : m_colorType == ANARI_UFIXED8_RGBA_SRGB ? FrameFormat::SRGB + : FrameFormat::UINT; launchCompositeBackground(m_accumColor.ptrAs(), useFloatOutput ? (vec4 *)m_pixelBuffer.dataDevice() : nullptr, useFloatOutput ? nullptr : (uint32_t *)m_pixelBuffer.dataDevice(), hd.renderer, hd.fb.size, hd.fb.invSize, - hd.fb.format, + outFormat, hd.fb.frameID, hd.fb.checkerboardID, /*isDenoised=*/false, diff --git a/devices/rtx/device/frame/Frame.h b/devices/rtx/device/frame/Frame.h index 6acbbc4d0..ba8236cde 100644 --- a/devices/rtx/device/frame/Frame.h +++ b/devices/rtx/device/frame/Frame.h @@ -35,8 +35,8 @@ #include "camera/Camera.h" #include "gpu/gpu_objects.h" #include "renderer/Renderer.h" -#include "world/World.h" #include "utility/DeviceObject.h" +#include "world/World.h" // helium #include "helium/BaseFrame.h" // std @@ -122,6 +122,13 @@ struct Frame : public helium::BaseFrame, public DeviceObject DeviceBuffer m_accumAlbedo; // vec3 DeviceBuffer m_accumNormal; // vec3 + // Per-pixel pre-denoise estimates. Keeping these separate from pixelBuffer + // avoids the denoiser reading its own previous output on non-rendered + // checkerboard pixels (which cycles-4 flicker at edges). + DeviceBuffer m_denoiseInput; // vec4 + DeviceBuffer m_denoiseAlbedo; // vec3 + DeviceBuffer m_denoiseNormal; // vec3 + helium::IntrusivePtr m_renderer; helium::IntrusivePtr m_camera; helium::IntrusivePtr m_world; diff --git a/devices/rtx/device/gpu/gpu_objects.h b/devices/rtx/device/gpu/gpu_objects.h index beb3e0292..ee0d2a6b5 100644 --- a/devices/rtx/device/gpu/gpu_objects.h +++ b/devices/rtx/device/gpu/gpu_objects.h @@ -734,8 +734,6 @@ enum class FrameFormat struct FrameBuffers { glm::vec4 *colorAccumulation; - glm::vec4 *outColorVec4; - uint32_t *outColorUint; float *depth; uint32_t *primID; uint32_t *objID; @@ -750,7 +748,6 @@ struct FramebufferGPUData int frameID; int checkerboardID; float invFrameID; - FrameFormat format; glm::uvec2 size; glm::vec2 invSize; }; diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index 8b02d709c..106fca0e2 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -380,18 +380,6 @@ VISRTX_DEVICE uint32_t pixelIndex( return pixel.x + pixel.y * fb.size.x; } -VISRTX_DEVICE void writeOutputColor( - const FramebufferGPUData &fb, const vec4 &color, const uint32_t idx) -{ - if (fb.format == FrameFormat::SRGB) { - fb.buffers.outColorUint[idx] = - glm::packUnorm4x8(glm::convertLinearToSRGB(color)); - } else if (fb.format == FrameFormat::UINT) - fb.buffers.outColorUint[idx] = glm::packUnorm4x8(color); - else - fb.buffers.outColorVec4[idx] = color; -} - } // namespace detail VISRTX_DEVICE void setPixelIds(const FramebufferGPUData &fb, @@ -432,48 +420,16 @@ VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const uvec2 &pixel, const vec4 &color, const vec3 &albedo, - const vec3 &normal, - const int frameIDOffset = 0) + const vec3 &normal) { const auto &fb = frame.fb; const uint32_t idx = detail::pixelIndex(fb, pixel); - const auto frameID = fb.frameID + frameIDOffset; - - // Conditionally apply tonemapping during accumulation - if (frame.renderer.fireflyFilter) - detail::accumValue( - fb.buffers.colorAccumulation, idx, detail::tonemap(color)); - else - detail::accumValue(fb.buffers.colorAccumulation, idx, color); + + detail::accumValue(fb.buffers.colorAccumulation, + idx, + frame.renderer.fireflyFilter ? detail::tonemap(color) : color); detail::accumValue(fb.buffers.albedo, idx, albedo); detail::accumValue(fb.buffers.normal, idx, normal); - - const auto accumColor = fb.buffers.colorAccumulation[idx]; - // Conditionally apply inverse tonemapping on output - const float frameDivisor = float(fb.frameID + frameIDOffset + 1); - const auto normalizedColor = accumColor / frameDivisor; - const auto outputColor = frame.renderer.fireflyFilter - ? detail::inverseTonemap(normalizedColor) - : normalizedColor; - - detail::writeOutputColor(fb, outputColor, idx); - - if (fb.checkerboardID == 0 && frameID == 0) { - auto adjPix = uvec2(pixel.x + 1, pixel.y + 0); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - - adjPix = uvec2(pixel.x + 0, pixel.y + 1); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - - adjPix = uvec2(pixel.x + 1, pixel.y + 1); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - } } } // namespace visrtx diff --git a/devices/rtx/device/gpu/renderer/raygen_helpers.h b/devices/rtx/device/gpu/renderer/raygen_helpers.h index b51eb3c26..a597c7c72 100644 --- a/devices/rtx/device/gpu/renderer/raygen_helpers.h +++ b/devices/rtx/device/gpu/renderer/raygen_helpers.h @@ -214,8 +214,7 @@ VISRTX_DEVICE void renderPixel(FrameGPUData &frameData, ScreenSample ss) ss.pixel, vec4(outputColor, outputOpacity), outputAlbedo, - outputNormal, - i); + outputNormal); } } diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index 852533c36..692a3ab97 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -77,15 +77,13 @@ struct SampleDetails VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const uvec2 &pixel, - const SampleDetails &sample, - const int frameIDOffset = 0) + const SampleDetails &sample) { accumPixelSample(frame, pixel, vec4(sample.color, sample.opacity), sample.albedo, - sample.normal, - frameIDOffset); + sample.normal); } VISRTX_DEVICE vec3 surfaceAttenuation(ScreenSample &ss, Ray r) @@ -387,6 +385,9 @@ VISRTX_GLOBAL void __raygen__() auto nextRay = materialNextRay(shadingState, ray, ss.rs); sampleContribution *= nextRay.contributionWeight; + if (!continuesThroughSurface(nextRay)) + accumulateValue(sample.opacity, 1.0f, sample.opacity); + if (shouldTerminatePath(ss, d, sampleContribution, true)) break; @@ -413,7 +414,7 @@ VISRTX_GLOBAL void __raygen__() } } - accumPixelSample(frameData, ss.pixel, sample, i); + accumPixelSample(frameData, ss.pixel, sample); } } From e1e4c7afcf9c5134f8f2d23c8658bfd3a3047f0d Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:51:55 -0400 Subject: [PATCH 09/16] rtx: delete dead getBackground() and getBackgroundImage() --- devices/rtx/device/gpu/gpu_util.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index 106fca0e2..bcc384afb 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -295,14 +295,6 @@ VISRTX_DEVICE vec3 sampleHDRI(const LightGPUData &ld, const vec3 &rayDir) return sampleHDRI(ld, vec2(u, v)) * ld.hdri.scale; } -VISRTX_DEVICE vec4 getBackgroundImage( - const RendererGPUData &rd, const vec2 &loc) -{ - return rd.backgroundMode == BackgroundMode::COLOR - ? rd.background.color - : make_vec4(tex2D<::float4>(rd.background.texobj, loc.x, loc.y)); -} - VISRTX_DEVICE bool getBackgroundLight( const FrameGPUData &fd, const vec3 &rayDir, vec3 &outRadiance) { @@ -326,18 +318,6 @@ VISRTX_DEVICE bool getBackgroundLight( return hasVisibleHDRI; } -VISRTX_DEVICE vec4 getBackground( - const FrameGPUData &fd, const vec2 &loc, const vec3 &rayDir) -{ - vec3 hdriContribution; - const bool hasVisibleHDRI = getBackgroundLight(fd, rayDir, hdriContribution); - if (hasVisibleHDRI) - return vec4(hdriContribution, 1.f); - - // No visible HDRI, use background image/color - return getBackgroundImage(fd.renderer, loc); -} - VISRTX_DEVICE uint32_t computeGeometryPrimId(const SurfaceHit &hit) { if (!hit.foundHit) From 633998030c136a799f8958051af0b42f6bb6cbdd Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Wed, 22 Apr 2026 13:53:05 -0400 Subject: [PATCH 10/16] rtx: Drop redundant firstHitAssigned Each iteration of the depth loop runs at most one of the volume-scatter, surface-hit, or no-hit branches, so on the first bounce the !firstHitAssigned check is always true. The flag and its assignments can go. --- devices/rtx/device/renderer/Quality_ptx.cu | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index 692a3ab97..e65d650ee 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -269,7 +269,6 @@ VISRTX_GLOBAL void __raygen__() vec3(0.0f), 0.0f, vec3(0.0f), ray.t.upper, vec3(0.0f)}; auto sampleContribution = vec3(1.0f); - bool firstHitAssigned = false; for (int d = 0; d < qualityParams.maxRayDepth; ++d) { const bool isFirstBounce = d == 0; @@ -315,7 +314,7 @@ VISRTX_GLOBAL void __raygen__() if (shouldTerminatePath(ss, d, sampleContribution, true)) break; - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, volumeSample.depth, @@ -328,7 +327,6 @@ VISRTX_GLOBAL void __raygen__() ? volumeSample.normal : -ray.dir; sample.normal = volumeNormal; - firstHitAssigned = true; } const vec3 scatterDir = randomDir(ss.rs); @@ -346,7 +344,7 @@ VISRTX_GLOBAL void __raygen__() const vec3 materialTint = materialEvaluateTint(shadingState); const float materialOpacity = materialEvaluateOpacity(shadingState); - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, surfaceHit.t, @@ -356,7 +354,6 @@ VISRTX_GLOBAL void __raygen__() sample.depth = surfaceHit.t; sample.normal = materialEvaluateNormal(shadingState); sample.albedo = materialTint; - firstHitAssigned = true; } sample.color += sampleContribution * materialEmission * materialOpacity; @@ -406,7 +403,7 @@ VISRTX_GLOBAL void __raygen__() accumulateValue(sample.opacity, 1.f, sample.opacity); } - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, ray.t.upper, ~0u, ~0u, ~0u); } From 82951784906977e83a558c31337fabe741ce1d07 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:13:57 -0400 Subject: [PATCH 11/16] rtx: Make sure Matte honours opacity with Quality renderer --- devices/rtx/device/material/shaders/MatteShader_ptx.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/devices/rtx/device/material/shaders/MatteShader_ptx.cu b/devices/rtx/device/material/shaders/MatteShader_ptx.cu index ae0d34375..7766c0b3c 100644 --- a/devices/rtx/device/material/shaders/MatteShader_ptx.cu +++ b/devices/rtx/device/material/shaders/MatteShader_ptx.cu @@ -53,6 +53,11 @@ VISRTX_CALLABLE void __direct_callable__init(MatteShadingState *shadingState, VISRTX_CALLABLE NextRay __direct_callable__nextRay( const MatteShadingState *shadingState, const Ray *ray, RandState *rs) { + // Before anything, check for opacity. If below, then we just pass through + if (curand_uniform(rs) > shadingState->opacity) { + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; + } + return NextRay{vec3(0.0f, 0.0f, 0.0f), vec3(0.0f, 0.0f, 0.0f)}; } From c004649876a413b2dca28b0de554864a7f3bc80e Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Wed, 22 Apr 2026 21:03:52 -0400 Subject: [PATCH 12/16] rtx: Better handling of transmission to alpha in the final frame Helps rendering a transparent scene over an image background. Adds a NEXT_RAY_CONTINUES_THROUGH_SURFACE flag and a continuesThroughSurface(NextRay) helper so callers (Quality_ptx, PhysicallyBasedShader, MatteShader) can distinguish a real material sample from an alpha-driven pass-through. NextRay default values collapse from vec4 to vec3 to drop a stale alpha lane. The per-hit accumulateValue(sample.opacity, materialOpacity, ...) on the surface branch in Quality_ptx is removed; opacity accumulation is now driven by whether the chosen next ray continues through the surface. --- devices/rtx/device/gpu/evalShading.h | 14 ++++++----- devices/rtx/device/gpu/gpu_util.h | 5 ++++ .../shaders/PhysicallyBasedShader_ptx.cu | 24 +++++++++---------- .../rtx/device/renderer/Interactive_ptx.cu | 9 ++++--- devices/rtx/device/renderer/Quality_ptx.cu | 2 -- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/devices/rtx/device/gpu/evalShading.h b/devices/rtx/device/gpu/evalShading.h index e9cd7f6b9..e02079884 100644 --- a/devices/rtx/device/gpu/evalShading.h +++ b/devices/rtx/device/gpu/evalShading.h @@ -81,14 +81,15 @@ VISRTX_DEVICE float materialEvaluateOpacity( } VISRTX_DEVICE vec3 materialEvaluateEmission( - const MaterialShadingState &shadingState, const vec3& outgoingDir) + const MaterialShadingState &shadingState, const vec3 &outgoingDir) { if (shadingState.callableBaseIndex == ~DeviceObjectIndex(0)) return vec3(0.0f, 0.0f, 0.0f); // Default emission color return optixDirectCall(shadingState.callableBaseIndex + int(SurfaceShaderEntryPoints::EvaluateEmission), - &shadingState.data, &outgoingDir); + &shadingState.data, + &outgoingDir); } VISRTX_DEVICE vec3 materialEvaluateTransmission( @@ -113,11 +114,12 @@ VISRTX_DEVICE vec3 materialEvaluateNormal( &shadingState.data); } -VISRTX_DEVICE NextRay materialNextRay(const MaterialShadingState &shadingState, - const Ray &ray, RandState& rs) +VISRTX_DEVICE NextRay materialNextRay( + const MaterialShadingState &shadingState, const Ray &ray, RandState &rs) { - if (shadingState.callableBaseIndex == ~DeviceObjectIndex(0)) // No next ray by defaut - return NextRay{vec4(0.0f), vec4(0.0f)}; + if (shadingState.callableBaseIndex + == ~DeviceObjectIndex(0)) // No next ray by defaut + return NextRay{vec3(0.0f), vec3(0.0f)}; return optixDirectCall(shadingState.callableBaseIndex + int(SurfaceShaderEntryPoints::EvaluateNextRay), diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index bcc384afb..ee269b9f8 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -275,6 +275,11 @@ VISRTX_DEVICE bool isMiddelPixel( return pixel.x == (fb.size.x / 2) && pixel.y == (fb.size.y / 2); } +VISRTX_DEVICE bool continuesThroughSurface(const NextRay &nextRay) +{ + return (nextRay.flags & NEXT_RAY_CONTINUES_THROUGH_SURFACE) != 0u; +} + VISRTX_DEVICE vec3 sampleHDRI(const LightGPUData &ld, const vec2 &uv) { return vec3(make_vec4(tex2D<::float4>(ld.hdri.radiance, uv.x, uv.y))); diff --git a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu index 7abbde0a4..80ec7da3b 100644 --- a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu +++ b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu @@ -166,13 +166,13 @@ VISRTX_CALLABLE vec3 __direct_callable__shadeSurface( + sqrtf(alpha * alpha + (1.f - alpha * alpha) * NdotV * NdotV))); const float denom = 4.f * fabsf(NdotV) * fabsf(NdotL); - const vec3 specularBRDF = - denom != 0.f ? (F * D * G) / denom : vec3(0.f); + const vec3 specularBRDF = denom != 0.f ? (F * D * G) / denom : vec3(0.f); // Transmission is applied only to the diffuse BRDF. This is intentional: // In this model, transmission reduces the diffuse reflection, while specular - // reflection (surface reflection) is not affected by transmission, as it represents - // light reflected at the surface rather than transmitted through the material. + // reflection (surface reflection) is not affected by transmission, as it + // represents light reflected at the surface rather than transmitted through + // the material. return (diffuseBRDF * (1.0f - shadingState->transmission) + specularBRDF) * NdotL * lightSample->radiance / lightSample->pdf; } @@ -184,9 +184,7 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay( { // Before anything, check for opacity. If below, then we just pass through if (curand_uniform(rs) > shadingState->opacity) - { return NextRay{ray->dir, vec3(1.0f)}; - } // Open cone, along the perfect reflection ray, with a metallic and // roughness-dependent angle @@ -198,16 +196,16 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay( bool isReflected = curand_uniform(rs) > transmission; auto nextVector = isReflected - ? glm::reflect(ray->dir, shadingState->normal) - : glm::refract(ray->dir, shadingState->normal, shadingState->ior); + ? glm::reflect(ray->dir, shadingState->normal) + : glm::refract(ray->dir, shadingState->normal, shadingState->ior); auto nextRay = computeOrthonormalBasis(normalize(nextVector)) - * uniformSampleCone(cosThetaMax, - vec3(curand_uniform(rs), curand_uniform(rs), curand_uniform(rs))); + * uniformSampleCone(cosThetaMax, + vec3(curand_uniform(rs), curand_uniform(rs), curand_uniform(rs))); auto nextSampleWeight = isReflected - ? shadingState->baseColor * metalness * (1.0f - transmission) - : shadingState->baseColor * transmission; + ? shadingState->baseColor * metalness * (1.0f - transmission) + : shadingState->baseColor * transmission; return NextRay{nextRay, nextSampleWeight}; -} \ No newline at end of file +} diff --git a/devices/rtx/device/renderer/Interactive_ptx.cu b/devices/rtx/device/renderer/Interactive_ptx.cu index 9678c7c8d..aebcaf304 100644 --- a/devices/rtx/device/renderer/Interactive_ptx.cu +++ b/devices/rtx/device/renderer/Interactive_ptx.cu @@ -150,14 +150,13 @@ struct InteractiveShadingPolicy * rendererParams.ambientColor * rendererParams.ambientIntensity; contrib += color * nextRay.contributionWeight; } else { - if (vec3 hdri; getBackgroundLight(frameData, bounceRay.dir, hdri)) { - contrib += vec3(hdri) * nextRay.contributionWeight; - } + vec3 hdri; + if (getBackgroundLight(frameData, bounceRay.dir, hdri)) + contrib += hdri * nextRay.contributionWeight; } } - float opacity = evaluateOpacity(shadingState); - return vec4(contrib, opacity); + return vec4(contrib, evaluateOpacity(shadingState)); } }; diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index e65d650ee..a75893edf 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -377,8 +377,6 @@ VISRTX_GLOBAL void __raygen__() } } - accumulateValue(sample.opacity, materialOpacity, sample.opacity); - auto nextRay = materialNextRay(shadingState, ray, ss.rs); sampleContribution *= nextRay.contributionWeight; From 52fea87a6ed726825267a4e2c1b50107dad05438 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:27:38 -0400 Subject: [PATCH 13/16] rtx: fix HDRI NEE pdf normalization Fix 2 importance sampling bugs: - Per pixel area was wrong when computing the pdf weight - Pole bias was accounted twice, in the precomputed luminance and in sample light. --- devices/rtx/device/gpu/sampleLight.h | 61 ++++++++++++++---------- devices/rtx/device/light/sampling/CDF.cu | 42 ++++++++-------- 2 files changed, 58 insertions(+), 45 deletions(-) diff --git a/devices/rtx/device/gpu/sampleLight.h b/devices/rtx/device/gpu/sampleLight.h index e54e3bf03..14bbe64cf 100644 --- a/devices/rtx/device/gpu/sampleLight.h +++ b/devices/rtx/device/gpu/sampleLight.h @@ -62,10 +62,10 @@ namespace visrtx { // Light sampling result containing direction, distance, radiance and PDF struct LightSample { - vec3 radiance; // Emitted radiance in direction of hit point (W⋅sr⁻¹⋅m⁻²) - vec3 dir; // Unit direction vector from hit point to light sample - float dist; // Distance from hit point to light sample - float pdf; // Probability density function value for this sample + vec3 radiance; // Emitted radiance in direction of hit point (W⋅sr⁻¹⋅m⁻²) + vec3 dir; // Unit direction vector from hit point to light sample + float dist; // Distance from hit point to light sample + float pdf; // Probability density function value for this sample }; namespace detail { @@ -74,8 +74,8 @@ VISRTX_DEVICE LightSample sampleDirectionalLight( const LightGPUData &ld, const mat4 &xfm) { LightSample ls; - // Transform light direction to world space and negate to get direction TO light - // (ld.distant.direction points FROM the light source) + // Transform light direction to world space and negate to get direction TO + // light (ld.distant.direction points FROM the light source) ls.dir = xfmVec(xfm, -ld.distant.direction); ls.dist = std::numeric_limits::infinity(); // For directional lights, irradiance is the amount of light per unit area @@ -135,16 +135,19 @@ VISRTX_DEVICE LightSample sampleSphereLight( // Area PDF = 1 / (4πr²), but we need solid angle PDF // Conversion: pdf_solid_angle = pdf_area * distance² / |cos θ| // For sphere: cos θ = dot(surface_normal, -light_direction) - // Surface normal at sampled point: direction from sphere center to sample point + // Surface normal at sampled point: direction from sphere center to sample + // point auto worldSphereCenter = xfmPoint(xfm, ld.sphere.position); auto surfaceNormal = normalize(worldSamplePos - worldSphereCenter); auto cosTheta = dot(surfaceNormal, -ls.dir); if (cosTheta > 0.0f) { // Note: For non-uniform scaling transforms, the area calculation would need - // to account for the transform's effect on surface area (determinant of jacobian) - // Currently assumes uniform scaling or no scaling of the light geometry - float areaPdf = 1.f / (4.f * float(M_PI) * ld.sphere.radius * ld.sphere.radius); + // to account for the transform's effect on surface area (determinant of + // jacobian) Currently assumes uniform scaling or no scaling of the light + // geometry + float areaPdf = + 1.f / (4.f * float(M_PI) * ld.sphere.radius * ld.sphere.radius); ls.pdf = areaPdf * pow2(ls.dist) / cosTheta; } else { // Back-facing surface element contributes no light @@ -179,9 +182,9 @@ VISRTX_DEVICE LightSample sampleRectLight( // Handle front/back face emission based on light configuration if (ld.rect.side.back) { if (ld.rect.side.front) - cosTheta = fabsf(cosTheta); // Both sides: always positive + cosTheta = fabsf(cosTheta); // Both sides: always positive else - cosTheta = -cosTheta; // Back only: flip to back face + cosTheta = -cosTheta; // Back only: flip to back face } // Front only: use cosTheta as-is (positive for front face) @@ -216,7 +219,8 @@ VISRTX_DEVICE LightSample sampleRingLight( // For uniform area sampling: r² = u₂(R² - r²) + r² where R=outer, r=inner auto outerRadius = ld.ring.radius; auto innerRadius = ld.ring.innerRadius; - auto r = sqrtf(u2 * (outerRadius * outerRadius - innerRadius * innerRadius) + innerRadius * innerRadius); + auto r = sqrtf(u2 * (outerRadius * outerRadius - innerRadius * innerRadius) + + innerRadius * innerRadius); // Create orthonormal basis with ring direction as normal auto direction = normalize(ld.ring.direction); @@ -246,7 +250,8 @@ VISRTX_DEVICE LightSample sampleRingLight( } else { // Falloff region: smooth interpolation using smoothstep function // smoothstep(t) = 3t² - 2t³ provides C¹ continuity - spot = (cosTheta - ld.ring.cosOuterAngle) / (ld.ring.cosInnerAngle - ld.ring.cosOuterAngle); + spot = (cosTheta - ld.ring.cosOuterAngle) + / (ld.ring.cosInnerAngle - ld.ring.cosOuterAngle); spot = spot * spot * (3.0f - 2.0f * spot); } @@ -258,7 +263,7 @@ VISRTX_DEVICE LightSample sampleRingLight( // Convert area PDF to solid angle PDF for proper Monte Carlo integration // Ring area = π(R² - r²), so area PDF = 1 / ring_area // Solid angle PDF = area_pdf * distance² / |cos θ| - float areaPdf = ld.ring.oneOverArea; // This is 1 / ring_area + float areaPdf = ld.ring.oneOverArea; // This is 1 / ring_area ls.pdf = areaPdf * pow2(ls.dist) / cosTheta; } else { ls.radiance = vec3(0.0f); @@ -290,14 +295,14 @@ VISRTX_DEVICE LightSample sampleSpotLight( // Apply spotlight cone attenuation with smooth falloff if (spot < ld.spot.cosOuterAngle) - spot = 0.f; // Outside cone: no illumination + spot = 0.f; // Outside cone: no illumination else if (spot > ld.spot.cosInnerAngle) - spot = 1.f; // Inside inner cone: full illumination + spot = 1.f; // Inside inner cone: full illumination else { // Falloff region: smooth interpolation using smoothstep spot = (spot - ld.spot.cosOuterAngle) / (ld.spot.cosInnerAngle - ld.spot.cosOuterAngle); - spot = spot * spot * (3.f - 2.f * spot); // smoothstep function + spot = spot * spot * (3.f - 2.f * spot); // smoothstep function } // Apply inverse square law with spotlight attenuation @@ -325,13 +330,15 @@ VISRTX_DEVICE LightSample sampleHDRILight( / glm::vec2(float(M_PI) * 2.0f, float(M_PI)); auto radiance = sampleHDRI(ld, uv); - // Calculate PDF using luminance (ITU-R BT.709 weights) and jacobian - // sin(θ) term accounts for the jacobian of spherical→rectangular mapping - auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * sinf(thetaPhi.x) * ld.hdri.pdfWeight; + // pdf_ω = (L/totalL) · pdfWeight; the equirectangular sinθ jacobian is + // already folded into the CDF (computeWeightedLuminance) and into + // pdfWeight's 2π²/(W·H) factor, so do not re-multiply by sinθ here. + auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * ld.hdri.pdfWeight; LightSample ls; ls.dir = xfmVec(xfm, dir); - ls.dist = std::numeric_limits::infinity(); // Environment is at infinity + ls.dist = + std::numeric_limits::infinity(); // Environment is at infinity ls.radiance = radiance * ld.hdri.scale; ls.pdf = pdf; @@ -342,7 +349,8 @@ VISRTX_DEVICE LightSample sampleHDRILight( const LightGPUData &ld, const mat4 &xfm, RandState &rs) { // Importance sampling using hierarchical (marginal/conditional) CDF approach - // First sample row (y) using marginal CDF, then column (x) using conditional CDF + // First sample row (y) using marginal CDF, then column (x) using conditional + // CDF auto y = inverseSampleCDF( ld.hdri.marginalCDF, ld.hdri.size.y, curand_uniform(&rs)); auto x = inverseSampleCDF(ld.hdri.conditionalCDF + y * ld.hdri.size.x, @@ -365,16 +373,17 @@ VISRTX_DEVICE LightSample sampleHDRILight( // uv.y ∈ [0,1] → θ ∈ [0,π], uv.x ∈ [0,1] → φ ∈ [0,2π] auto thetaPhi = float(M_PI) * glm::vec2(uv.y, 2.0f * (uv.x)); - // Calculate PDF using luminance and jacobian of spherical mapping + // pdf_ω = (L/totalL) · pdfWeight; the equirectangular sinθ jacobian is + // already folded into the CDF and pdfWeight, so do not re-multiply here. auto radiance = sampleHDRI(ld, uv); - auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * sinf(thetaPhi.x) * ld.hdri.pdfWeight; + auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * ld.hdri.pdfWeight; LightSample ls; // Transform spherical direction to world space // ld.hdri.xfm is orthogonal, so we can use right-hand multiplication // instead of explicitly transposing/inverting the matrix ls.dir = xfmVec(xfm, sphericalCoordsToDirection(thetaPhi) * ld.hdri.xfm); - ls.dist = 1e20f; // Environment is effectively at infinity + ls.dist = 1e20f; // Environment is effectively at infinity ls.radiance = radiance * ld.hdri.scale; ls.pdf = pdf; diff --git a/devices/rtx/device/light/sampling/CDF.cu b/devices/rtx/device/light/sampling/CDF.cu index dddde35a3..39f7c63cc 100644 --- a/devices/rtx/device/light/sampling/CDF.cu +++ b/devices/rtx/device/light/sampling/CDF.cu @@ -134,10 +134,10 @@ void normalizeMarginalCDF(float *marginalCdf, int height) using thrust::device_pointer_cast; auto cdf = device_pointer_cast(marginalCdf); - thrust::transform(cdf, - cdf + height, - cdf, - [total = cdf[height - 1]] __device__(float x) { return x / total; }); + thrust::transform( + cdf, cdf + height, cdf, [total = cdf[height - 1]] __device__(float x) { + return x / total; + }); } void normalizeConditionalCDFs(float *d_conditional_cdf, int width, int height) @@ -146,10 +146,10 @@ void normalizeConditionalCDFs(float *d_conditional_cdf, int width, int height) for (int y = 0; y < height; ++y) { auto cdfRow = device_pointer_cast(d_conditional_cdf + y * width); - thrust::transform( - cdfRow, cdfRow + width, cdfRow, [total = cdfRow[width - 1]] __device__(float x) { - return x / total; - }); + thrust::transform(cdfRow, + cdfRow + width, + cdfRow, + [total = cdfRow[width - 1]] __device__(float x) { return x / total; }); } } @@ -172,22 +172,23 @@ float generateCDFTables(const float *luminanceImage, computeRowSums(luminanceImage, rowSums.ptrAs(), width, height); computeMarginalCDF( rowSums.ptrAs(), marginalCdf->ptrAs(), height); - computeConditionalCDFs(luminanceImage, - conditionalCdf->ptrAs(), - width, - height); + computeConditionalCDFs( + luminanceImage, conditionalCdf->ptrAs(), width, height); // Compute pdfWeight // Not the best, but accumulation operations of cdfs accumulate error. // Lets recompute the total luminance from the luminance array // to avoid this. - auto totalLuminance = reduce( - device_pointer_cast(luminanceImage), - device_pointer_cast(luminanceImage) + width * height); + auto totalLuminance = reduce(device_pointer_cast(luminanceImage), + device_pointer_cast(luminanceImage) + width * height); - float angularArea = 4.0f * float(M_PI) / (width * height); - float weight = 1.0f / (totalLuminance * angularArea); + // Equirectangular Jacobian |dω/d(u,v)| = 2π²·sinθ; the sinθ weighting is + // already folded into the CDF luminance, so the per-pixel area factor is + // 2π²/(W·H) and pdf_ω = (L/totalL) · (W·H)/(2π²). + const float equirectJacobian = + 2.0f * float(M_PI) * float(M_PI) / (width * height); + float weight = 1.0f / (totalLuminance * equirectJacobian); // Normalize both tables normalizeMarginalCDF(marginalCdf->ptrAs(), height); @@ -211,8 +212,11 @@ float generateCDFTables(const glm::vec3 *rgbImage, computeWeightedLuminance(rgbImage, luminance.ptrAs(), width, height); - return generateCDFTables( - luminance.ptrAs(), width, height, marginalCdf, conditionalCdf); + return generateCDFTables(luminance.ptrAs(), + width, + height, + marginalCdf, + conditionalCdf); } } // namespace visrtx From da9fc8df86e04f64c60bdbb06d341e69395746db Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:35:43 -0400 Subject: [PATCH 14/16] rtx: put uniform light-pick factor into pdf instead of radiance Move the light probability weight into pdf instead of returned radiance so MIS consumers see the correct joint density. --- devices/rtx/device/renderer/Quality_ptx.cu | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index a75893edf..eab5868da 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -145,23 +145,25 @@ VISRTX_DEVICE LightSample sampleLights(ScreenSample &ss, glm::min(size_t((1.0f - curand_uniform(&ss.rs)) * float(numLights)), numLights - 1); - const float radianceWeight = float(numLights); + // Uniform light pick: P(light) = 1/numLights. Fold that into the returned + // pdf rather than into radiance so MIS weights see the full joint pdf + // P(dir, light) = P(dir | light) * (1/numLights). + const float lightPickPdf = 1.0f / float(numLights); // last index is reserved for ambient light if it exists if (selectedIdx == world.numLightInstances) { const auto &rendererParams = frameData.renderer; return LightSample{ - radianceWeight * rendererParams.ambientColor - * rendererParams.ambientIntensity, + rendererParams.ambientColor * rendererParams.ambientIntensity, sampleHemisphere(ss.rs, normal), std::numeric_limits::max(), - 1.0f / (2.0f * float(M_PI)), + lightPickPdf / (2.0f * float(M_PI)), }; } else { const auto &lightInstance = world.lightInstances[selectedIdx]; auto ls = sampleLight(ss, origin, lightInstance.lightIndex, lightInstance.xfm); - ls.radiance *= radianceWeight; + ls.pdf *= lightPickPdf; return ls; } } From 66eca6226ea8f1088ed360a8783761003b23b12f Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:44:44 -0400 Subject: [PATCH 15/16] =?UTF-8?q?rtx:=20PhysicallyBased=20=E2=80=94=20glTF?= =?UTF-8?q?=202.0=20KHR=20extension=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire up the glTF 2.0 KHR material extensions on the PhysicallyBased material — specular, clearcoat, sheen, iridescence, occlusion, and volume (thickness, attenuation distance and color) — with the usual constant / attribute / sampler routing on host and GPU. Rework the shader so direct lighting and next-ray sampling use a proper microfacet model and compose the new lobes (base + clearcoat + sheen, iridescent Fresnel). Drop the cone-around-mirror reflection sample and the hand-tuned 0.85 transmission tint. Add a NextRay.flags field to distinguish opacity pass-through from a real bounce; Quality and Interactive use it. --- devices/rtx/device/gpu/gpu_objects.h | 17 + devices/rtx/device/gpu/gpu_util.h | 4 +- .../rtx/device/gpu/renderer/raygen_helpers.h | 4 +- devices/rtx/device/gpu/shadingState.h | 25 +- devices/rtx/device/material/PBR.cpp | 155 ++++- devices/rtx/device/material/PBR.h | 52 +- .../device/material/shaders/MDLShader_ptx.cu | 18 +- .../material/shaders/MatteShader_ptx.cu | 2 +- .../shaders/PhysicallyBasedShader_ptx.cu | 560 ++++++++++++++---- .../rtx/device/renderer/Interactive_ptx.cu | 10 +- devices/rtx/device/renderer/Quality_ptx.cu | 11 +- 11 files changed, 696 insertions(+), 162 deletions(-) diff --git a/devices/rtx/device/gpu/gpu_objects.h b/devices/rtx/device/gpu/gpu_objects.h index ee0d2a6b5..f1197856a 100644 --- a/devices/rtx/device/gpu/gpu_objects.h +++ b/devices/rtx/device/gpu/gpu_objects.h @@ -363,6 +363,23 @@ struct MaterialGPUData MaterialParameter transmission; float ior; + + // KHR_materials_* extensions + DeviceObjectIndex occlusionSampler; + MaterialParameter specular; + MaterialParameter specularColor; + uint32_t useSpecular; + MaterialParameter clearcoat; + MaterialParameter clearcoatRoughness; + DeviceObjectIndex clearcoatNormalSampler; + MaterialParameter thickness; + float attenuationDistance; + vec3 attenuationColor; + MaterialParameter sheenColor; + MaterialParameter sheenRoughness; + MaterialParameter iridescence; + float iridescenceIor; + MaterialParameter iridescenceThickness; }; struct MDL diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index ee269b9f8..9e17c44ba 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -34,8 +34,7 @@ #include "cameraCreateRay.h" #include "gpu/gpu_debug.h" #include "gpu_objects.h" -#include "gpu_tonemap.h" - +#include "shadingState.h" // optix #include // std @@ -46,6 +45,7 @@ #include // cuda #include +#include "gpu_tonemap.h" #ifndef __CUDACC__ #error "gpu_util.h can only be included in device code" diff --git a/devices/rtx/device/gpu/renderer/raygen_helpers.h b/devices/rtx/device/gpu/renderer/raygen_helpers.h index a597c7c72..5f65d6405 100644 --- a/devices/rtx/device/gpu/renderer/raygen_helpers.h +++ b/devices/rtx/device/gpu/renderer/raygen_helpers.h @@ -65,7 +65,9 @@ VISRTX_DEVICE float volumeAttenuation(ScreenSample &ss, const Ray &r) VISRTX_DEVICE float evaluateOpacity(const MaterialShadingState &shadingState) { return materialEvaluateOpacity(shadingState) - * (1.0f - glm::luminosity(materialEvaluateTransmission(shadingState))); + * (1.0f + - glm::luminosity(materialEvaluateTransmission(shadingState)) + * 0.85f); } // Templated rendering loop diff --git a/devices/rtx/device/gpu/shadingState.h b/devices/rtx/device/gpu/shadingState.h index 467056b34..1831d5436 100644 --- a/devices/rtx/device/gpu/shadingState.h +++ b/devices/rtx/device/gpu/shadingState.h @@ -49,11 +49,18 @@ namespace visrtx { -// Describes the next ray to be traced, as a result of the EvaluateNextRay call +enum NextRayFlags : uint32_t +{ + NEXT_RAY_NONE = 0u, + NEXT_RAY_CONTINUES_THROUGH_SURFACE = 1u << 0 +}; + +// Describes the next ray to be traced, as a result of the EvaluateNextRay call. struct NextRay { vec3 direction; vec3 contributionWeight; + uint32_t flags{NEXT_RAY_NONE}; }; // Matte @@ -75,6 +82,22 @@ struct PhysicallyBasedShadingState float transmission; float ior; vec3 emission; + + float occlusion; + float specular; + vec3 specularColor; + uint32_t useSpecular; + float clearcoat; + float clearcoatRoughness; + vec3 clearcoatNormal; + float thickness; + float attenuationDistance; + vec3 attenuationColor; + vec3 sheenColor; + float sheenRoughness; + float iridescence; + float iridescenceIor; + float iridescenceThickness; }; #ifdef USE_MDL diff --git a/devices/rtx/device/material/PBR.cpp b/devices/rtx/device/material/PBR.cpp index ca938f6f0..eecae93ce 100644 --- a/devices/rtx/device/material/PBR.cpp +++ b/devices/rtx/device/material/PBR.cpp @@ -43,7 +43,18 @@ PBR::PBR(DeviceGlobalState *d) m_roughnessSampler(this), m_normalSampler(this), m_emissiveSampler(this), - m_transmissionSampler(this) + m_occlusionSampler(this), + m_specularSampler(this), + m_specularColorSampler(this), + m_clearcoatSampler(this), + m_clearcoatRoughnessSampler(this), + m_clearcoatNormalSampler(this), + m_transmissionSampler(this), + m_thicknessSampler(this), + m_sheenColorSampler(this), + m_sheenRoughnessSampler(this), + m_iridescenceSampler(this), + m_iridescenceThicknessSampler(this) {} void PBR::commitParameters() @@ -52,7 +63,7 @@ void PBR::commitParameters() m_opacitySampler = getParamObject("opacity"); m_opacityAttribute = getParamString("opacity", ""); - m_color = vec4(vec3(0.8f), 1.f); + m_color = vec4(1.f); getParam("baseColor", ANARI_FLOAT32_VEC4, &m_color); getParam("baseColor", ANARI_FLOAT32_VEC3, &m_color); m_colorSampler = getParamObject("baseColor"); @@ -68,18 +79,70 @@ void PBR::commitParameters() m_normalSampler = getParamObject("normal"); - m_emissive = vec4(0.f, 0.f, 0.f, 0.f); + m_emissive = vec4(0.f); getParam("emissive", ANARI_FLOAT32_VEC4, &m_emissive); getParam("emissive", ANARI_FLOAT32_VEC3, &m_emissive); m_emissiveSampler = getParamObject("emissive"); m_emissiveAttribute = getParamString("emissive", ""); + m_occlusionSampler = getParamObject("occlusion"); + + m_useSpecular = getParamDirect("specular").valid() + || getParamDirect("specularColor").valid(); + m_specular = getParam("specular", m_useSpecular ? 1.f : 0.f); + m_specularSampler = getParamObject("specular"); + m_specularAttribute = getParamString("specular", ""); + + m_specularColor = vec3(1.f); + getParam("specularColor", ANARI_FLOAT32_VEC3, &m_specularColor); + m_specularColorSampler = getParamObject("specularColor"); + m_specularColorAttribute = getParamString("specularColor", ""); + + m_clearcoat = getParam("clearcoat", 0.f); + m_clearcoatSampler = getParamObject("clearcoat"); + m_clearcoatAttribute = getParamString("clearcoat", ""); + + m_clearcoatRoughness = getParam("clearcoatRoughness", 0.f); + m_clearcoatRoughnessSampler = getParamObject("clearcoatRoughness"); + m_clearcoatRoughnessAttribute = getParamString("clearcoatRoughness", ""); + + m_clearcoatNormalSampler = getParamObject("clearcoatNormal"); + m_transmission = getParam("transmission", 0.f); m_transmissionSampler = getParamObject("transmission"); m_transmissionAttribute = getParamString("transmission", ""); m_ior = getParam("ior", 1.5f); + m_thickness = getParam("thickness", 0.f); + m_thicknessSampler = getParamObject("thickness"); + m_thicknessAttribute = getParamString("thickness", ""); + + m_attenuationDistance = getParam( + "attenuationDistance", std::numeric_limits::infinity()); + m_attenuationColor = vec3(1.f); + getParam("attenuationColor", ANARI_FLOAT32_VEC3, &m_attenuationColor); + + m_sheenColor = vec3(0.f); + getParam("sheenColor", ANARI_FLOAT32_VEC3, &m_sheenColor); + m_sheenColorSampler = getParamObject("sheenColor"); + m_sheenColorAttribute = getParamString("sheenColor", ""); + + m_sheenRoughness = getParam("sheenRoughness", 0.f); + m_sheenRoughnessSampler = getParamObject("sheenRoughness"); + m_sheenRoughnessAttribute = getParamString("sheenRoughness", ""); + + m_iridescence = getParam("iridescence", 0.f); + m_iridescenceSampler = getParamObject("iridescence"); + m_iridescenceAttribute = getParamString("iridescence", ""); + + m_iridescenceIor = getParam("iridescenceIor", 1.3f); + + m_iridescenceThickness = getParam("iridescenceThickness", 0.f); + m_iridescenceThicknessSampler = + getParamObject("iridescenceThickness"); + m_iridescenceThicknessAttribute = getParamString("iridescenceThickness", ""); + m_cutoff = getParam("alphaCutoff", 0.5f); m_mode = alphaModeFromString(getParamString("alphaMode", "opaque")); } @@ -87,39 +150,81 @@ void PBR::commitParameters() MaterialGPUData PBR::gpuData() const { MaterialGPUData retval; + auto &pb = retval.materialData.physicallyBased; retval.callableBaseIndex = static_cast(SbtCallableEntryPoints::PBR); - populateMaterialParameter(retval.materialData.physicallyBased.baseColor, - m_color, - m_colorSampler.get(), - m_colorAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.opacity, - m_opacity, - m_opacitySampler.get(), - m_opacityAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.metallic, - m_metallic, - m_metallicSampler.get(), - m_metallicAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.roughness, + populateMaterialParameter( + pb.baseColor, m_color, m_colorSampler.get(), m_colorAttribute); + populateMaterialParameter( + pb.opacity, m_opacity, m_opacitySampler.get(), m_opacityAttribute); + populateMaterialParameter( + pb.metallic, m_metallic, m_metallicSampler.get(), m_metallicAttribute); + populateMaterialParameter(pb.roughness, m_roughness, m_roughnessSampler.get(), m_roughnessAttribute); - retval.materialData.physicallyBased.normalSampler = + pb.normalSampler = m_normalSampler ? m_normalSampler->index() : ~DeviceObjectIndex{0}; - populateMaterialParameter(retval.materialData.physicallyBased.emissive, - m_emissive, - m_emissiveSampler.get(), - m_emissiveAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.transmission, + populateMaterialParameter( + pb.emissive, m_emissive, m_emissiveSampler.get(), m_emissiveAttribute); + populateMaterialParameter(pb.transmission, m_transmission, m_transmissionSampler.get(), m_transmissionAttribute); - retval.materialData.physicallyBased.ior = m_ior; - retval.materialData.physicallyBased.cutoff = m_cutoff; - retval.materialData.physicallyBased.alphaMode = m_mode; + pb.ior = m_ior; + pb.cutoff = m_cutoff; + pb.alphaMode = m_mode; + + pb.occlusionSampler = + m_occlusionSampler ? m_occlusionSampler->index() : ~DeviceObjectIndex{0}; + + populateMaterialParameter( + pb.specular, m_specular, m_specularSampler.get(), m_specularAttribute); + populateMaterialParameter(pb.specularColor, + vec4(m_specularColor, 1.f), + m_specularColorSampler.get(), + m_specularColorAttribute); + pb.useSpecular = m_useSpecular ? 1u : 0u; + + populateMaterialParameter(pb.clearcoat, + m_clearcoat, + m_clearcoatSampler.get(), + m_clearcoatAttribute); + populateMaterialParameter(pb.clearcoatRoughness, + m_clearcoatRoughness, + m_clearcoatRoughnessSampler.get(), + m_clearcoatRoughnessAttribute); + pb.clearcoatNormalSampler = m_clearcoatNormalSampler + ? m_clearcoatNormalSampler->index() + : ~DeviceObjectIndex{0}; + + populateMaterialParameter(pb.thickness, + m_thickness, + m_thicknessSampler.get(), + m_thicknessAttribute); + pb.attenuationDistance = m_attenuationDistance; + pb.attenuationColor = m_attenuationColor; + + populateMaterialParameter(pb.sheenColor, + vec4(m_sheenColor, 0.f), + m_sheenColorSampler.get(), + m_sheenColorAttribute); + populateMaterialParameter(pb.sheenRoughness, + m_sheenRoughness, + m_sheenRoughnessSampler.get(), + m_sheenRoughnessAttribute); + + populateMaterialParameter(pb.iridescence, + m_iridescence, + m_iridescenceSampler.get(), + m_iridescenceAttribute); + pb.iridescenceIor = m_iridescenceIor; + populateMaterialParameter(pb.iridescenceThickness, + m_iridescenceThickness, + m_iridescenceThicknessSampler.get(), + m_iridescenceThicknessAttribute); return retval; } diff --git a/devices/rtx/device/material/PBR.h b/devices/rtx/device/material/PBR.h index 71b12a3a5..e6ce2cc80 100644 --- a/devices/rtx/device/material/PBR.h +++ b/devices/rtx/device/material/PBR.h @@ -31,8 +31,10 @@ #pragma once -#include "sampler/Sampler.h" #include "Material.h" +#include "sampler/Sampler.h" + +#include namespace visrtx { @@ -48,7 +50,7 @@ struct PBR : public Material float m_cutoff{0.5f}; AlphaMode m_mode{AlphaMode::OPAQUE}; - vec4 m_color{vec3(0.8f), 1.f}; + vec4 m_color{1.f, 1.f, 1.f, 1.f}; helium::ChangeObserverPtr m_colorSampler; std::string m_colorAttribute; @@ -70,11 +72,57 @@ struct PBR : public Material helium::ChangeObserverPtr m_emissiveSampler; std::string m_emissiveAttribute; + helium::ChangeObserverPtr m_occlusionSampler; + + float m_specular{0.f}; + helium::ChangeObserverPtr m_specularSampler; + std::string m_specularAttribute; + bool m_useSpecular{false}; + + vec3 m_specularColor{1.f}; + helium::ChangeObserverPtr m_specularColorSampler; + std::string m_specularColorAttribute; + + float m_clearcoat{0.f}; + helium::ChangeObserverPtr m_clearcoatSampler; + std::string m_clearcoatAttribute; + + float m_clearcoatRoughness{0.f}; + helium::ChangeObserverPtr m_clearcoatRoughnessSampler; + std::string m_clearcoatRoughnessAttribute; + + helium::ChangeObserverPtr m_clearcoatNormalSampler; + float m_transmission{0.f}; helium::ChangeObserverPtr m_transmissionSampler; std::string m_transmissionAttribute; float m_ior{1.5f}; + + float m_thickness{0.f}; + helium::ChangeObserverPtr m_thicknessSampler; + std::string m_thicknessAttribute; + + float m_attenuationDistance{std::numeric_limits::infinity()}; + vec3 m_attenuationColor{1.f}; + + vec3 m_sheenColor{0.f}; + helium::ChangeObserverPtr m_sheenColorSampler; + std::string m_sheenColorAttribute; + + float m_sheenRoughness{0.f}; + helium::ChangeObserverPtr m_sheenRoughnessSampler; + std::string m_sheenRoughnessAttribute; + + float m_iridescence{0.f}; + helium::ChangeObserverPtr m_iridescenceSampler; + std::string m_iridescenceAttribute; + + float m_iridescenceIor{1.3f}; + + float m_iridescenceThickness{0.f}; + helium::ChangeObserverPtr m_iridescenceThicknessSampler; + std::string m_iridescenceThicknessAttribute; }; } // namespace visrtx diff --git a/devices/rtx/device/material/shaders/MDLShader_ptx.cu b/devices/rtx/device/material/shaders/MDLShader_ptx.cu index cef1e7083..795b4f1f2 100644 --- a/devices/rtx/device/material/shaders/MDLShader_ptx.cu +++ b/devices/rtx/device/material/shaders/MDLShader_ptx.cu @@ -204,7 +204,7 @@ NextRay __direct_callable__nextRay( if (curand_uniform(rs) > mdlOpacity(&shadingState->state, &shadingState->resData, shadingState->argBlock)) { - return NextRay{ray->dir, vec3(1.0f)}; + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; } // Sample @@ -227,10 +227,17 @@ NextRay __direct_callable__nextRay( &shadingState->resData, shadingState->argBlock); - return NextRay{vec3(sample_data.k2.x, sample_data.k2.y, sample_data.k2.z), + const vec3 direction(sample_data.k2.x, sample_data.k2.y, sample_data.k2.z); + const vec3 N = normalize(make_vec3(shadingState->state.normal)); + const uint32_t flags = dot(ray->dir, N) * dot(direction, N) > 0.0f + ? NEXT_RAY_CONTINUES_THROUGH_SURFACE + : NEXT_RAY_NONE; + + return NextRay{direction, vec3(sample_data.bsdf_over_pdf.x, sample_data.bsdf_over_pdf.y, - sample_data.bsdf_over_pdf.z)}; + sample_data.bsdf_over_pdf.z), + flags}; } // Signature must match the call inside shaderMDLSurface in MDLShader.cuh. @@ -273,12 +280,11 @@ vec3 __direct_callable__evaluateTransmission( const MDLShadingState *shadingState) { return mdlTransmission( - &shadingState->state, &shadingState->resData, shadingState->argBlock) - * 0.85f; + &shadingState->state, &shadingState->resData, shadingState->argBlock); } VISRTX_CALLABLE vec3 __direct_callable__evaluateNormal(const MDLShadingState *shadingState) { return make_vec3(shadingState->state.normal); -} \ No newline at end of file +} diff --git a/devices/rtx/device/material/shaders/MatteShader_ptx.cu b/devices/rtx/device/material/shaders/MatteShader_ptx.cu index 7766c0b3c..06429e5fa 100644 --- a/devices/rtx/device/material/shaders/MatteShader_ptx.cu +++ b/devices/rtx/device/material/shaders/MatteShader_ptx.cu @@ -58,7 +58,7 @@ VISRTX_CALLABLE NextRay __direct_callable__nextRay( return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; } - return NextRay{vec3(0.0f, 0.0f, 0.0f), vec3(0.0f, 0.0f, 0.0f)}; + return NextRay{vec3(0.0f), vec3(0.0f)}; } VISRTX_CALLABLE diff --git a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu index 80ec7da3b..5e710b729 100644 --- a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu +++ b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu @@ -38,174 +38,512 @@ using namespace visrtx; +// Clearcoat is fixed to IOR 1.5 per glTF KHR_materials_clearcoat (F0 = 0.04). +constexpr float CLEARCOAT_F0 = 0.04f; + +//----------------------------------------------------------------------------- +// Helpers +//----------------------------------------------------------------------------- + +VISRTX_DEVICE vec3 applyNormalMap( + const vec3 &tangentSpaceNormal, const SurfaceHit &hit, const vec3 &N) +{ + vec3 T = normalize(hit.tU); + vec3 B = normalize(hit.tV); + // Gram-Schmidt to build an orthonormal frame tied to N. + T = normalize(T - dot(T, N) * N); + B = normalize(B - dot(B, N) * N - dot(B, T) * T); + return normalize(T * tangentSpaceNormal.x + B * tangentSpaceNormal.y + + N * tangentSpaceNormal.z); +} + +VISRTX_DEVICE vec3 sampleNormalMap(const FrameGPUData &fd, + DeviceObjectIndex samplerIdx, + const SurfaceHit &hit, + const vec3 &fallback) +{ + if (samplerIdx == ~visrtx::DeviceObjectIndex{0}) + return fallback; + const vec3 ts = normalize(evaluateSampler(fd, samplerIdx, hit) * 2.0f - 1.0f); + return applyNormalMap(ts, hit, hit.Ns); +} + +VISRTX_DEVICE float luminance(const vec3 &c) +{ + return dot(c, vec3(0.2126f, 0.7152f, 0.0722f)); +} + +VISRTX_DEVICE vec3 computeVolumeTransmission( + const PhysicallyBasedShadingState *state) +{ + if (!(state->thickness > 0.0f && state->attenuationDistance > 0.0f + && isfinite(state->attenuationDistance))) + return vec3(1.0f); + + const float k = state->thickness / state->attenuationDistance; + return vec3(powf(fmaxf(state->attenuationColor.x, 1e-6f), k), + powf(fmaxf(state->attenuationColor.y, 1e-6f), k), + powf(fmaxf(state->attenuationColor.z, 1e-6f), k)); +} + +VISRTX_DEVICE vec3 computeTransmissionFilter( + const PhysicallyBasedShadingState *state) +{ + const float transmission = + fmaxf(0.0f, (1.0f - state->metallic) * state->transmission); + return state->baseColor * transmission * computeVolumeTransmission(state); +} + +// Smith Lambda for GGX (common subterm of G1 / G2). +VISRTX_DEVICE float smithLambdaGGX(float NdotX, float alpha2) +{ + const float NdotX2 = NdotX * NdotX; + const float safe = fmaxf(NdotX2, 1e-8f); + return 0.5f + * (-1.0f + sqrtf(fmaxf(0.0f, 1.0f + alpha2 * (1.0f - safe) / safe))); +} + +VISRTX_DEVICE float smithG2GGX(float NdotV, float NdotL, float alpha2) +{ + return 1.0f + / (1.0f + smithLambdaGGX(NdotV, alpha2) + smithLambdaGGX(NdotL, alpha2)); +} + +VISRTX_DEVICE float smithG1GGX(float NdotV, float alpha2) +{ + return 1.0f / (1.0f + smithLambdaGGX(NdotV, alpha2)); +} + +VISRTX_DEVICE float ggxD(float NdotH, float alpha2) +{ + const float denom = NdotH * NdotH * (alpha2 - 1.0f) + 1.0f; + return alpha2 / (float(M_PI) * denom * denom); +} + +// Heitz 2018 (https://jcgt.org/published/0007/04/01/) visible-normal sampling +// for GGX. Ve is the view direction in local tangent space (+z = normal). +VISRTX_DEVICE vec3 sampleGGXVNDF( + const vec3 &Ve, float alpha, float u1, float u2) +{ + const vec3 Vh = normalize(vec3(alpha * Ve.x, alpha * Ve.y, Ve.z)); + const float lensq = Vh.x * Vh.x + Vh.y * Vh.y; + const vec3 T1 = lensq > 0.0f ? vec3(-Vh.y, Vh.x, 0.0f) * (1.0f / sqrtf(lensq)) + : vec3(1.0f, 0.0f, 0.0f); + const vec3 T2 = glm::cross(Vh, T1); + const float r = sqrtf(u1); + const float phi = 2.0f * float(M_PI) * u2; + const float t1 = r * cosf(phi); + float t2 = r * sinf(phi); + const float s = 0.5f * (1.0f + Vh.z); + t2 = (1.0f - s) * sqrtf(fmaxf(0.0f, 1.0f - t1 * t1)) + s * t2; + const vec3 Nh = + t1 * T1 + t2 * T2 + sqrtf(fmaxf(0.0f, 1.0f - t1 * t1 - t2 * t2)) * Vh; + return normalize(vec3(alpha * Nh.x, alpha * Nh.y, fmaxf(0.0f, Nh.z))); +} + +// Charlie distribution (Estevez-Kulla 2017) for sheen. +VISRTX_DEVICE float charlieD(float NdotH, float alpha) +{ + const float invAlpha = 1.0f / fmaxf(alpha, 1e-4f); + const float sin2 = fmaxf(0.0f, 1.0f - NdotH * NdotH); + return (2.0f + invAlpha) * powf(sin2, 0.5f * invAlpha) / (2.0f * float(M_PI)); +} + +// Ashikhmin visibility term (Neubelt-Pettineo variant) used with Charlie D. +VISRTX_DEVICE float charlieV(float NdotV, float NdotL) +{ + return 1.0f / (4.0f * (NdotV + NdotL - NdotV * NdotL) + 1e-6f); +} + +// glTF KHR_materials_iridescence thin-film Fresnel (port of the reference +// implementation at github.com/KhronosGroup/glTF-Sample-Renderer). Returns a +// per-channel Fresnel reflectance for a thin film of thickness T sitting on a +// base with Schlick F0. See the spec's Appendix B for the math. +VISRTX_DEVICE vec3 fresnel0ToIor(vec3 F0) +{ + const vec3 s = sqrt(glm::clamp(F0, vec3(0.0f), vec3(0.9999f))); + return (vec3(1.0f) + s) / (vec3(1.0f) - s); +} + +VISRTX_DEVICE vec3 iorToFresnel0(vec3 transmittedIor, float incidentIor) +{ + const vec3 t = (transmittedIor - vec3(incidentIor)) + / (transmittedIor + vec3(incidentIor)); + return t * t; +} + +VISRTX_DEVICE float iorToFresnel0(float transmittedIor, float incidentIor) +{ + const float t = + (transmittedIor - incidentIor) / (transmittedIor + incidentIor); + return t * t; +} + +VISRTX_DEVICE vec3 evalSensitivity(float opd, vec3 shift) +{ + // Approximate spectral sensitivity of the standard observer as three + // Gaussians (Belcour & Barla 2017, simplified) so the result stays in RGB. + const float phase = 2.0f * float(M_PI) * opd * 1e-9f; + const vec3 val = vec3(5.4856e-13f, 4.4201e-13f, 5.2481e-13f); + const vec3 pos = vec3(1.6810e+06f, 1.7953e+06f, 2.2084e+06f); + const vec3 var = vec3(4.3278e+09f, 9.3046e+09f, 6.6121e+09f); + + vec3 xyz = val * sqrt(2.0f * float(M_PI) * var) * cos(pos * phase + shift) + * exp(-var * phase * phase); + xyz.x += 9.7470e-14f * sqrtf(2.0f * float(M_PI) * 4.5282e+09f) + * cosf(2.2399e+06f * phase + shift.x) + * expf(-4.5282e+09f * phase * phase); + xyz /= 1.0685e-7f; + + // sRGB conversion (D65). + return vec3(3.2404542f * xyz.x - 1.5371385f * xyz.y - 0.4985314f * xyz.z, + -0.9692660f * xyz.x + 1.8760108f * xyz.y + 0.0415560f * xyz.z, + 0.0556434f * xyz.x - 0.2040259f * xyz.y + 1.0572252f * xyz.z); +} + +VISRTX_DEVICE vec3 evalIridescence(float outsideIor, + float iridescenceIor, + float cosTheta1, + float thickness, + vec3 baseF0) +{ + // Handle the case where thin-film IOR is close to the outside IOR: return + // the base Fresnel to avoid division by zero and phase artifacts. + const float iridescenceIorSafe = fmaxf(iridescenceIor, outsideIor + 1e-4f); + + // Force iridescenceIor > outsideIor (otherwise Snell's law cannot refract). + const float sinTheta2Sq = + pow2(outsideIor / iridescenceIorSafe) * (1.0f - cosTheta1 * cosTheta1); + const float cosTheta2Sq = 1.0f - sinTheta2Sq; + if (cosTheta2Sq < 0.0f) + return vec3(1.0f); // Total internal reflection. + const float cosTheta2 = sqrtf(cosTheta2Sq); + + // First interface: Fresnel between outside and thin film. + const float R0_12 = iorToFresnel0(iridescenceIorSafe, outsideIor); + const float R12 = R0_12 + (1.0f - R0_12) * pow5(1.0f - cosTheta1); + const float T121 = 1.0f - R12; + const float phi12 = iridescenceIorSafe < outsideIor ? float(M_PI) : 0.0f; + const float phi21 = float(M_PI) - phi12; + + // Second interface: film to base. + const vec3 baseIor = + fresnel0ToIor(glm::clamp(baseF0, vec3(0.f), vec3(0.9999f))); + const vec3 R1 = iorToFresnel0(baseIor, iridescenceIorSafe); + const vec3 R23 = R1 + (vec3(1.0f) - R1) * pow5(1.0f - cosTheta2); + const vec3 phi23 = vec3(baseIor.x < iridescenceIorSafe ? float(M_PI) : 0.0f, + baseIor.y < iridescenceIorSafe ? float(M_PI) : 0.0f, + baseIor.z < iridescenceIorSafe ? float(M_PI) : 0.0f); + + const float opd = 2.0f * iridescenceIorSafe * thickness * cosTheta2; + const vec3 phi = vec3(phi21) + phi23; + + const vec3 R123 = glm::clamp(R12 * R23, vec3(1e-5f), vec3(0.9999f)); + const vec3 r123 = sqrt(R123); + const vec3 Rs = pow2(T121) * R23 / (vec3(1.0f) - R123); + + // DC term. + vec3 C0 = R12 + Rs; + vec3 I = C0; + + // Higher-order terms. + vec3 Cm = Rs - T121; + for (int m = 1; m <= 2; ++m) { + Cm *= r123; + const vec3 Sm = 2.0f * evalSensitivity(float(m) * opd, float(m) * phi); + I += Cm * Sm; + } + + return glm::max(I, vec3(0.0f)); +} + +//----------------------------------------------------------------------------- +// Initialize shading state from material parameters +//----------------------------------------------------------------------------- + VISRTX_CALLABLE void __direct_callable__init( PhysicallyBasedShadingState *shadingState, const FrameGPUData *fd, const SurfaceHit *hit, const MaterialGPUData::PhysicallyBased *md) { - vec4 color = getMaterialParameter(*fd, md->baseColor, *hit); - float opacity = getMaterialParameter(*fd, md->opacity, *hit).x; + const vec4 color = getMaterialParameter(*fd, md->baseColor, *hit); + const float opacity = getMaterialParameter(*fd, md->opacity, *hit).x; shadingState->baseColor = vec3(color); - vec3 normal = hit->Ns; - - if (md->normalSampler != ~visrtx::DeviceObjectIndex{0}) { - // Normal mapping computation. - auto normalMapValue = - normalize(evaluateSampler(*fd, md->normalSampler, *hit) * 2.0f - 1.0f); - vec3 T = normalize(hit->tU); - vec3 B = normalize(hit->tV); + const vec3 N = sampleNormalMap(*fd, md->normalSampler, *hit, hit->Ns); + shadingState->normal = N; - // Ensure orthogonality (Gram-Schmidt process) - T = normalize(T - dot(T, normal) * normal); - B = normalize(B - dot(B, normal) * normal - dot(B, T) * T); - - // Transform normal from tangent space to world space - normal = normalize(T * normalMapValue.x + B * normalMapValue.y - + normal * normalMapValue.z); - } - - shadingState->normal = normal; shadingState->opacity = adjustedMaterialOpacity(color.w * opacity, md->alphaMode, md->cutoff); shadingState->ior = hit->isFrontFace ? 1.0f / md->ior : md->ior; shadingState->metallic = getMaterialParameter(*fd, md->metallic, *hit).x; shadingState->roughness = getMaterialParameter(*fd, md->roughness, *hit).x; - - // Emission mapping shadingState->emission = vec3(getMaterialParameter(*fd, md->emissive, *hit)); - - // Transmission shadingState->transmission = getMaterialParameter(*fd, md->transmission, *hit).x; + + shadingState->occlusion = + md->occlusionSampler == ~visrtx::DeviceObjectIndex{0} + ? 1.0f + : evaluateSampler(*fd, md->occlusionSampler, *hit).x; + + shadingState->specular = getMaterialParameter(*fd, md->specular, *hit).x; + shadingState->specularColor = + vec3(getMaterialParameter(*fd, md->specularColor, *hit)); + shadingState->useSpecular = md->useSpecular; + + shadingState->clearcoat = getMaterialParameter(*fd, md->clearcoat, *hit).x; + shadingState->clearcoatRoughness = + getMaterialParameter(*fd, md->clearcoatRoughness, *hit).x; + shadingState->clearcoatNormal = + sampleNormalMap(*fd, md->clearcoatNormalSampler, *hit, hit->Ns); + + shadingState->thickness = getMaterialParameter(*fd, md->thickness, *hit).x; + shadingState->attenuationDistance = md->attenuationDistance; + shadingState->attenuationColor = md->attenuationColor; + + shadingState->sheenColor = + vec3(getMaterialParameter(*fd, md->sheenColor, *hit)); + shadingState->sheenRoughness = + getMaterialParameter(*fd, md->sheenRoughness, *hit).x; + + shadingState->iridescence = + getMaterialParameter(*fd, md->iridescence, *hit).x; + shadingState->iridescenceIor = md->iridescenceIor; + shadingState->iridescenceThickness = + getMaterialParameter(*fd, md->iridescenceThickness, *hit).x; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateTint( +//----------------------------------------------------------------------------- +// Simple accessors +//----------------------------------------------------------------------------- + +VISRTX_CALLABLE vec3 __direct_callable__evaluateTint( const PhysicallyBasedShadingState *shadingState) { return shadingState->baseColor; } -VISRTX_CALLABLE -float __direct_callable__evaluateOpacity( +VISRTX_CALLABLE float __direct_callable__evaluateOpacity( const PhysicallyBasedShadingState *shadingState) { return shadingState->opacity; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateEmission( +VISRTX_CALLABLE vec3 __direct_callable__evaluateEmission( const PhysicallyBasedShadingState *shadingState, const vec3 *outgoingDir) { return shadingState->emission; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateTransmission( +VISRTX_CALLABLE vec3 __direct_callable__evaluateTransmission( const PhysicallyBasedShadingState *shadingState) { - return shadingState->baseColor * shadingState->transmission * 0.85f; + return computeTransmissionFilter(shadingState); } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateNormal( +VISRTX_CALLABLE vec3 __direct_callable__evaluateNormal( const PhysicallyBasedShadingState *shadingState) { return shadingState->normal; } -// Signature must match the call inside shaderPhysicallyBasedSurface in -// PhysicallyBasedShader.cuh. +//----------------------------------------------------------------------------- +// NEE shading: base (diffuse + GGX specular) + clearcoat + sheen +//----------------------------------------------------------------------------- + +VISRTX_DEVICE vec3 computeDielectricF0(const PhysicallyBasedShadingState *state) +{ + const float iorF0 = pow2((1.0f - state->ior) / (1.0f + state->ior)); + if (state->useSpecular == 0) + return vec3(iorF0); + return glm::min(vec3(iorF0) * state->specularColor, vec3(1.0f)) + * state->specular; +} + +VISRTX_DEVICE vec3 computeF0(const PhysicallyBasedShadingState *state) +{ + return glm::mix( + computeDielectricF0(state), state->baseColor, state->metallic); +} + +VISRTX_DEVICE vec3 computeF90(const PhysicallyBasedShadingState *state) +{ + const float dielectricF90 = state->useSpecular == 0 ? 1.0f : state->specular; + return glm::mix(vec3(dielectricF90), vec3(1.0f), state->metallic); +} + +VISRTX_DEVICE vec3 schlickFresnel(vec3 F0, vec3 F90, float VdotH) +{ + return F0 + (F90 - F0) * pow5(1.0f - fabsf(VdotH)); +} + VISRTX_CALLABLE vec3 __direct_callable__shadeSurface( - const PhysicallyBasedShadingState *shadingState, + const PhysicallyBasedShadingState *state, const SurfaceHit *hit, const LightSample *lightSample, const vec3 *outgoingDir) { - const float NdotL = dot(shadingState->normal, lightSample->dir); - if (NdotL <= 0.0f) - return vec3(0.0f, 0.0f, 0.0f); - - const vec3 H = normalize(lightSample->dir + *outgoingDir); - const float NdotH = dot(shadingState->normal, H); + const vec3 N = state->normal; + const vec3 V = *outgoingDir; + const vec3 L = lightSample->dir; - const float NdotV = dot(shadingState->normal, *outgoingDir); - const float VdotH = dot(*outgoingDir, H); - const float LdotH = dot(lightSample->dir, H); + const float NdotL = dot(N, L); + if (NdotL <= 0.0f) + return vec3(0.0f); + + const vec3 H = normalize(L + V); + const float NdotH = fmaxf(dot(N, H), 0.0f); + const float NdotV = fmaxf(dot(N, V), 1e-6f); + const float VdotH = fmaxf(dot(V, H), 0.0f); + + // Base F0 / F90, optionally overridden by iridescence. + vec3 F0 = computeF0(state); + vec3 F90 = computeF90(state); + vec3 F = schlickFresnel(F0, F90, VdotH); + if (state->iridescence > 0.0f && state->iridescenceThickness > 0.0f) { + const vec3 iridescent = evalIridescence( + 1.0f, state->iridescenceIor, VdotH, state->iridescenceThickness, F0); + F = glm::mix(F, iridescent, state->iridescence); + } - // Fresnel - const vec3 f0 = glm::mix( - vec3(pow2((1.f - shadingState->ior) / (1.f + shadingState->ior))), - shadingState->baseColor, - shadingState->metallic); - const vec3 F = f0 + (vec3(1.f) - f0) * pow5(1.f - fabsf(VdotH)); + // Base GGX specular lobe. + const float alpha = fmaxf(pow2(state->roughness), 1e-4f); + const float alpha2 = alpha * alpha; + const float D = ggxD(NdotH, alpha2); + const float G2 = smithG2GGX(NdotV, fmaxf(NdotL, 1e-6f), alpha2); + const vec3 specularBRDF = (F * D * G2) / (4.0f * NdotV * fmaxf(NdotL, 1e-6f)); - // Metallic materials don't reflect diffusely: + // Diffuse lobe (energy-balanced against specular, attenuated by occlusion + // and transmission; metals have no diffuse). const vec3 diffuseColor = - glm::mix(shadingState->baseColor, vec3(0.f), shadingState->metallic); + glm::mix(state->baseColor, vec3(0.0f), state->metallic); + const vec3 diffuseBRDF = (vec3(1.0f) - F) * float(M_1_PI) * diffuseColor + * state->occlusion * (1.0f - state->transmission); + + vec3 base = diffuseBRDF + specularBRDF; + + // Clearcoat: a second GGX lobe with its own normal and roughness, Fresnel- + // attenuating the base layer at both view and light angles. + if (state->clearcoat > 0.0f) { + const vec3 Nc = state->clearcoatNormal; + const float NcDotV = fmaxf(dot(Nc, V), 1e-6f); + const float NcDotL = fmaxf(dot(Nc, L), 0.0f); + const float NcDotH = fmaxf(dot(Nc, H), 0.0f); + const float FcV = + CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - NcDotV); + const float FcL = + CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - NcDotL); + const float alphaC = fmaxf(pow2(state->clearcoatRoughness), 1e-4f); + const float alphaC2 = alphaC * alphaC; + const float Dc = ggxD(NcDotH, alphaC2); + const float Gc = smithG2GGX(NcDotV, fmaxf(NcDotL, 1e-6f), alphaC2); + const float clearcoatLobe = + (FcV * Dc * Gc) / (4.0f * NcDotV * fmaxf(NcDotL, 1e-6f)); + + const float attnV = 1.0f - state->clearcoat * FcV; + const float attnL = 1.0f - state->clearcoat * FcL; + base = base * attnV * attnL; + base += + vec3(state->clearcoat * clearcoatLobe) * NcDotL / fmaxf(NdotL, 1e-6f); + } - const vec3 diffuseBRDF = - (vec3(1.f) - F) * float(M_1_PI) * diffuseColor * fmaxf(0.f, NdotL); + // Sheen: Charlie distribution + Ashikhmin visibility, added on top of the + // base layer without energy compensation (simple but consistent with the + // glTF reference for basic setups). + if (glm::any(glm::greaterThan(state->sheenColor, vec3(0.0f)))) { + const float alphaS = fmaxf(pow2(state->sheenRoughness), 1e-4f); + const float Ds = charlieD(NdotH, alphaS); + const float Vs = charlieV(NdotV, fmaxf(NdotL, 1e-6f)); + base += state->sheenColor * Ds * Vs; + } - // Alpha - const float alpha = pow2(shadingState->roughness) * shadingState->opacity; + return base * NdotL * lightSample->radiance / lightSample->pdf; +} - // GGX microfacet distribution - const float D = (alpha * alpha * heaviside(NdotH)) - / (float(M_PI) * pow2(NdotH * NdotH * (alpha * alpha - 1.f) + 1.f)); +//----------------------------------------------------------------------------- +// Next-ray importance sampling: stochastic alpha, Fresnel-aware lobe pick, +// GGX VNDF reflection/refraction. Clearcoat/sheen are NEE-only (no separate +// lobe sampling), which matches what the base renderer is set up to consume. +//----------------------------------------------------------------------------- - // Masking-shadowing term - const float G = - ((2.f * fabsf(NdotL) * heaviside(LdotH)) - / (fabsf(NdotL) - + sqrtf(alpha * alpha + (1.f - alpha * alpha) * NdotL * NdotL))) - * ((2.f * fabsf(NdotV) * heaviside(VdotH)) - / (fabsf(NdotV) - + sqrtf(alpha * alpha + (1.f - alpha * alpha) * NdotV * NdotV))); +VISRTX_CALLABLE NextRay __direct_callable__nextRay( + const PhysicallyBasedShadingState *state, const Ray *ray, RandState *rs) +{ + // Opacity pass-through (stochastic alpha): the ray continues unaltered. + if (curand_uniform(rs) > state->opacity) + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; + + const vec3 N = state->normal; + const vec3 V = -ray->dir; + const mat3 toWorld = computeOrthonormalBasis(N); + const mat3 toLocal = glm::transpose(toWorld); + const vec3 Vlocal = toLocal * V; + if (Vlocal.z <= 0.0f) + return NextRay{N, vec3(0.0f)}; + + const float alpha = fmaxf(pow2(state->roughness), 1e-4f); + const float alpha2 = alpha * alpha; + const vec3 Hlocal = + sampleGGXVNDF(Vlocal, alpha, curand_uniform(rs), curand_uniform(rs)); + + const float NdotV = Vlocal.z; + const float VdotH = fmaxf(dot(Vlocal, Hlocal), 0.0f); + + // Fresnel at the sampled microfacet, with optional iridescence. + const vec3 F0 = computeF0(state); + const vec3 F90 = computeF90(state); + vec3 F = schlickFresnel(F0, F90, VdotH); + if (state->iridescence > 0.0f && state->iridescenceThickness > 0.0f) { + const vec3 iridescent = evalIridescence( + 1.0f, state->iridescenceIor, VdotH, state->iridescenceThickness, F0); + F = glm::mix(F, iridescent, state->iridescence); + } - const float denom = 4.f * fabsf(NdotV) * fabsf(NdotL); - const vec3 specularBRDF = denom != 0.f ? (F * D * G) / denom : vec3(0.f); + const vec3 Lrefl = glm::reflect(-Vlocal, Hlocal); + const float eta = state->ior; // init() pre-inverted for front-facing hits + const vec3 Ltrans = glm::refract(-Vlocal, Hlocal, eta); + const vec3 transmissionFilter = computeTransmissionFilter(state); + const bool hasTransmission = luminance(transmissionFilter) > 0.0f; + const bool totalInternalReflection = + hasTransmission && (glm::length(Ltrans) < 1e-6f || Ltrans.z >= 0.0f); + + vec3 reflectEnergy = totalInternalReflection ? vec3(1.0f) : F; + vec3 transmitEnergy = totalInternalReflection + ? vec3(0.0f) + : glm::max(vec3(1.0f) - F, vec3(0.0f)) * transmissionFilter; + + const float reflectStrength = + fmaxf(luminance(glm::max(reflectEnergy, vec3(0.0f))), 0.0f); + const float transmitStrength = + fmaxf(luminance(glm::max(transmitEnergy, vec3(0.0f))), 0.0f); + const float combinedStrength = reflectStrength + transmitStrength; + if (combinedStrength <= 0.0f) + return NextRay{N, vec3(0.0f)}; + + const float reflectProb = reflectStrength / combinedStrength; + const bool sampleTransmission = curand_uniform(rs) > reflectProb; + + if (sampleTransmission) { + const float NdotL = -Ltrans.z; // L points through the surface. + const float G1 = smithG1GGX(NdotV, alpha2); + const float G2 = smithG2GGX(NdotV, NdotL, alpha2); + const vec3 weight = transmitEnergy * (G2 / fmaxf(G1, 1e-8f)) + / fmaxf(1.0f - reflectProb, 1e-8f); + return NextRay{normalize(toWorld * Ltrans), + weight, + NEXT_RAY_CONTINUES_THROUGH_SURFACE}; + } - // Transmission is applied only to the diffuse BRDF. This is intentional: - // In this model, transmission reduces the diffuse reflection, while specular - // reflection (surface reflection) is not affected by transmission, as it - // represents light reflected at the surface rather than transmitted through - // the material. - return (diffuseBRDF * (1.0f - shadingState->transmission) + specularBRDF) - * NdotL * lightSample->radiance / lightSample->pdf; -} + // Reflection. + if (Lrefl.z <= 0.0f) + return NextRay{N, vec3(0.0f)}; -VISRTX_CALLABLE NextRay __direct_callable__nextRay( - const PhysicallyBasedShadingState *shadingState, - const Ray *ray, - RandState *rs) -{ - // Before anything, check for opacity. If below, then we just pass through - if (curand_uniform(rs) > shadingState->opacity) - return NextRay{ray->dir, vec3(1.0f)}; - - // Open cone, along the perfect reflection ray, with a metallic and - // roughness-dependent angle - const float roughness = shadingState->roughness; - const float metalness = shadingState->metallic; - const float roughnessSqr = roughness * roughness; - const float cosThetaMax = 1.0f - (roughnessSqr * roughnessSqr); - const float transmission = shadingState->transmission; - - bool isReflected = curand_uniform(rs) > transmission; - auto nextVector = isReflected - ? glm::reflect(ray->dir, shadingState->normal) - : glm::refract(ray->dir, shadingState->normal, shadingState->ior); - - auto nextRay = computeOrthonormalBasis(normalize(nextVector)) - * uniformSampleCone(cosThetaMax, - vec3(curand_uniform(rs), curand_uniform(rs), curand_uniform(rs))); - - auto nextSampleWeight = isReflected - ? shadingState->baseColor * metalness * (1.0f - transmission) - : shadingState->baseColor * transmission; - - return NextRay{nextRay, nextSampleWeight}; + const float NdotL = Lrefl.z; + const float G1 = smithG1GGX(NdotV, alpha2); + const float G2 = smithG2GGX(NdotV, NdotL, alpha2); + const vec3 weight = + reflectEnergy * (G2 / fmaxf(G1, 1e-8f)) / fmaxf(reflectProb, 1e-8f); + return NextRay{normalize(toWorld * Lrefl), weight}; } diff --git a/devices/rtx/device/renderer/Interactive_ptx.cu b/devices/rtx/device/renderer/Interactive_ptx.cu index aebcaf304..c75d0a0ad 100644 --- a/devices/rtx/device/renderer/Interactive_ptx.cu +++ b/devices/rtx/device/renderer/Interactive_ptx.cu @@ -33,6 +33,7 @@ #include "gpu/evalShading.h" #include "gpu/gpu_math.h" #include "gpu/gpu_objects.h" +#include "gpu/gpu_util.h" #include "gpu/intersectRay.h" #include "gpu/renderer/common.h" #include "gpu/renderer/raygen_helpers.h" @@ -124,13 +125,10 @@ struct InteractiveShadingPolicy NextRay nextRay = materialNextRay(shadingState, ray, ss.rs); if (glm::any(glm::greaterThan( nextRay.contributionWeight, glm::vec3(MIN_CONTRIBUTION_EPSILON)))) { + const float side = continuesThroughSurface(nextRay) ? -1.0f : 1.0f; Ray bounceRay = { - bounceHit.hitpoint - + bounceHit.Ng - * std::copysignf( - bounceHit.epsilon, dot(bounceHit.Ns, nextRay.direction)), - normalize(nextRay.direction), - }; + bounceHit.hitpoint + bounceHit.Ng * bounceHit.epsilon * side, + normalize(nextRay.direction)}; // Only check for intersecting surfaces and background as secondary light // interactions diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index eab5868da..fe319255c 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -388,13 +388,10 @@ VISRTX_GLOBAL void __raygen__() if (shouldTerminatePath(ss, d, sampleContribution, true)) break; - ray = Ray{ - surfaceHit.hitpoint - + surfaceHit.Ng - * std::copysignf(surfaceHit.epsilon, - dot(surfaceHit.Ns, nextRay.direction)), - normalize(vec3(nextRay.direction)), - }; + const float side = continuesThroughSurface(nextRay) ? -1.0f : 1.0f; + ray = + Ray{surfaceHit.hitpoint + surfaceHit.Ng * surfaceHit.epsilon * side, + normalize(vec3(nextRay.direction))}; } if (!surfaceHit.foundHit && !volumeSample.didScatter) { From a8cce70b05c43a07eeb8849e47439fd99de19bb1 Mon Sep 17 00:00:00 2001 From: Thomas Arcila <134677+tarcila@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:05:29 -0400 Subject: [PATCH 16/16] Address Copilot feedback --- devices/rtx/device/gpu/evalShading.h | 2 +- devices/rtx/device/light/sampling/CDF.cu | 34 +++++++++++++----------- devices/rtx/device/renderer/Renderer.cpp | 3 ++- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/devices/rtx/device/gpu/evalShading.h b/devices/rtx/device/gpu/evalShading.h index e02079884..2c8558a01 100644 --- a/devices/rtx/device/gpu/evalShading.h +++ b/devices/rtx/device/gpu/evalShading.h @@ -118,7 +118,7 @@ VISRTX_DEVICE NextRay materialNextRay( const MaterialShadingState &shadingState, const Ray &ray, RandState &rs) { if (shadingState.callableBaseIndex - == ~DeviceObjectIndex(0)) // No next ray by defaut + == ~DeviceObjectIndex(0)) // No next ray by default return NextRay{vec3(0.0f), vec3(0.0f)}; return optixDirectCall(shadingState.callableBaseIndex diff --git a/devices/rtx/device/light/sampling/CDF.cu b/devices/rtx/device/light/sampling/CDF.cu index 39f7c63cc..3dfcedea2 100644 --- a/devices/rtx/device/light/sampling/CDF.cu +++ b/devices/rtx/device/light/sampling/CDF.cu @@ -129,27 +129,29 @@ void computeConditionalCDFs( } } -void normalizeMarginalCDF(float *marginalCdf, int height) +void normalizeCDF(thrust::device_ptr cdf, int n) { - using thrust::device_pointer_cast; + const float total = cdf[n - 1]; + if (total > 0.0f) { + thrust::transform( + cdf, cdf + n, cdf, [total] __device__(float x) { return x / total; }); + } else { + // Empty distribution; fill with uniform values so sampling doesn't walk off + // the end. + thrust::fill(cdf, cdf + n, 1.0f); + } +} - auto cdf = device_pointer_cast(marginalCdf); - thrust::transform( - cdf, cdf + height, cdf, [total = cdf[height - 1]] __device__(float x) { - return x / total; - }); +void normalizeMarginalCDF(float *marginalCdf, int height) +{ + normalizeCDF(thrust::device_pointer_cast(marginalCdf), height); } void normalizeConditionalCDFs(float *d_conditional_cdf, int width, int height) { - using thrust::device_pointer_cast; - for (int y = 0; y < height; ++y) { - auto cdfRow = device_pointer_cast(d_conditional_cdf + y * width); - thrust::transform(cdfRow, - cdfRow + width, - cdfRow, - [total = cdfRow[width - 1]] __device__(float x) { return x / total; }); + normalizeCDF( + thrust::device_pointer_cast(d_conditional_cdf + y * width), width); } } @@ -186,9 +188,11 @@ float generateCDFTables(const float *luminanceImage, // Equirectangular Jacobian |dω/d(u,v)| = 2π²·sinθ; the sinθ weighting is // already folded into the CDF luminance, so the per-pixel area factor is // 2π²/(W·H) and pdf_ω = (L/totalL) · (W·H)/(2π²). + // A zero-luminance map produces an inf weight; return 0 instead. const float equirectJacobian = 2.0f * float(M_PI) * float(M_PI) / (width * height); - float weight = 1.0f / (totalLuminance * equirectJacobian); + const float weight = + totalLuminance > 0.0f ? 1.0f / (totalLuminance * equirectJacobian) : 0.0f; // Normalize both tables normalizeMarginalCDF(marginalCdf->ptrAs(), height); diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp index 971ecbcc4..6e7bba2cb 100644 --- a/devices/rtx/device/renderer/Renderer.cpp +++ b/devices/rtx/device/renderer/Renderer.cpp @@ -170,7 +170,8 @@ void Renderer::commitParameters() m_cullTriangleBF = getParam("cullTriangleBackfaces", false); m_volumeSamplingRate = std::clamp(getParam("volumeSamplingRate", 0.125f), 1e-3f, 10.f); - m_premultipliedAlpha = getParam("premultipliedAlpha", false); + m_premultipliedAlpha = getParam( + "premultipliedAlpha", getParam("premultiplyBackground", false)); m_cutPlane = getParam("cutPlane", vec4(0.f)); if (m_checkerboard) m_spp = 1;