diff --git a/devices/rtx/device/frame/Denoiser.cu b/devices/rtx/device/frame/Denoiser.cu index 86668d807..328ca30ef 100644 --- a/devices/rtx/device/frame/Denoiser.cu +++ b/devices/rtx/device/frame/Denoiser.cu @@ -50,15 +50,16 @@ Denoiser::~Denoiser() } void Denoiser::setup(uvec2 size, - HostDeviceArray &pixelBuffer, + HostDeviceArray &outputBuffer, ANARIDataType format, - DeviceBuffer &accumAlbedo, - DeviceBuffer &accumNormal) + DeviceBuffer &input, + DeviceBuffer &albedo, + DeviceBuffer &normal) { - init(accumAlbedo, accumNormal); + init(albedo, normal); auto &state = *deviceState(); - m_pixelBuffer = &pixelBuffer; + m_pixelBuffer = &outputBuffer; m_format = format; @@ -83,22 +84,24 @@ void Denoiser::setup(uvec2 size, (CUdeviceptr)m_scratch.ptr(), m_scratch.bytes())); - m_layer.input.data = (CUdeviceptr)pixelBuffer.dataDevice(); + m_layer.input.data = (CUdeviceptr)input.ptr(); m_layer.input.width = size.x; m_layer.input.height = size.y; m_layer.input.pixelStrideInBytes = 0; m_layer.input.rowStrideInBytes = 4 * sizeof(float) * size.x; m_layer.input.format = OPTIX_PIXEL_FORMAT_FLOAT4; - std::memcpy(&m_layer.output, &m_layer.input, sizeof(m_layer.output)); - m_guideLayer.albedo.data = (CUdeviceptr)accumAlbedo.ptr(); + m_layer.output = m_layer.input; + m_layer.output.data = (CUdeviceptr)outputBuffer.dataDevice(); + + m_guideLayer.albedo.data = (CUdeviceptr)albedo.ptr(); m_guideLayer.albedo.width = size.x; m_guideLayer.albedo.height = size.y; m_guideLayer.albedo.pixelStrideInBytes = 3 * sizeof(float); m_guideLayer.albedo.rowStrideInBytes = 3 * sizeof(float) * size.x; m_guideLayer.albedo.format = OPTIX_PIXEL_FORMAT_FLOAT3; - m_guideLayer.normal.data = (CUdeviceptr)accumNormal.ptr(); + m_guideLayer.normal.data = (CUdeviceptr)normal.ptr(); m_guideLayer.normal.width = size.x; m_guideLayer.normal.height = size.y; m_guideLayer.normal.pixelStrideInBytes = 3 * sizeof(float); @@ -130,30 +133,33 @@ void Denoiser::launch() (CUdeviceptr)m_scratch.ptr(), static_cast(m_scratch.bytes()))); instrument::rangePop(); // optixDenoiserInvoke() +} - if (m_format != ANARI_FLOAT32_VEC4) { - instrument::rangePush("denoiser transform pixels"); - auto numPixels = - size_t(m_layer.output.width) * size_t(m_layer.output.height); - auto begin = thrust::device_ptr((vec4 *)m_pixelBuffer->dataDevice()); - auto end = begin + numPixels; - if (m_format == ANARI_UFIXED8_RGBA_SRGB) { - thrust::transform(thrust::cuda::par.on(state.stream), - begin, - end, - thrust::device_pointer_cast(m_uintPixels.dataDevice()), - [] __device__(const vec4 &in) { - return glm::packUnorm4x8(glm::convertLinearToSRGB(in)); - }); - } else { - thrust::transform(thrust::cuda::par.on(state.stream), - begin, - end, - thrust::device_pointer_cast(m_uintPixels.dataDevice()), - [] __device__(const vec4 &in) { return glm::packUnorm4x8(in); }); - } - instrument::rangePop(); // denoiser transform pixels +void Denoiser::convertOutput() +{ + if (m_format == ANARI_FLOAT32_VEC4) + return; + auto &state = *deviceState(); + instrument::rangePush("denoiser transform pixels"); + auto numPixels = size_t(m_layer.output.width) * size_t(m_layer.output.height); + auto begin = thrust::device_ptr((vec4 *)m_pixelBuffer->dataDevice()); + auto end = begin + numPixels; + if (m_format == ANARI_UFIXED8_RGBA_SRGB) { + thrust::transform(thrust::cuda::par.on(state.stream), + begin, + end, + thrust::device_pointer_cast(m_uintPixels.dataDevice()), + [] __device__(const vec4 &in) { + return glm::packUnorm4x8(glm::convertLinearToSRGB(in)); + }); + } else { + thrust::transform(thrust::cuda::par.on(state.stream), + begin, + end, + thrust::device_pointer_cast(m_uintPixels.dataDevice()), + [] __device__(const vec4 &in) { return glm::packUnorm4x8(in); }); } + instrument::rangePop(); // denoiser transform pixels } void *Denoiser::mapColorBuffer() @@ -185,7 +191,6 @@ void Denoiser::init( m_denoiser = {}; } - auto &state = *deviceState(); m_usingAlbedo = useAlbedo; m_usingNormal = useNormal; @@ -193,10 +198,13 @@ void Denoiser::init( options.guideAlbedo = m_usingAlbedo; options.guideNormal = m_usingNormal; - OPTIX_CHECK(optixDenoiserCreate(state.optixContext, - OPTIX_DENOISER_MODEL_KIND_AOV, - &options, - &m_denoiser)); + if (!m_denoiser) { + auto &state = *deviceState(); + OPTIX_CHECK(optixDenoiserCreate(state.optixContext, + OPTIX_DENOISER_MODEL_KIND_AOV, + &options, + &m_denoiser)); + } } -} // namespace visrtx \ No newline at end of file +} // namespace visrtx diff --git a/devices/rtx/device/frame/Denoiser.h b/devices/rtx/device/frame/Denoiser.h index 541d9b2c9..2196ebd10 100644 --- a/devices/rtx/device/frame/Denoiser.h +++ b/devices/rtx/device/frame/Denoiser.h @@ -41,12 +41,16 @@ struct Denoiser : public Object Denoiser(DeviceGlobalState *s); ~Denoiser() override; - void setup( - uvec2 size, HostDeviceArray &pixelBuffer, ANARIDataType format, - DeviceBuffer &accumAlbedo, DeviceBuffer &accumNormal); + void setup(uvec2 size, + HostDeviceArray &outputBuffer, + ANARIDataType format, + DeviceBuffer &input, + DeviceBuffer &albedo, + DeviceBuffer &normal); void cleanup(); void launch(); + void convertOutput(); void *mapColorBuffer(); void *mapGPUColorBuffer(); diff --git a/devices/rtx/device/frame/Frame.cu b/devices/rtx/device/frame/Frame.cu index 2aa877979..71a2ba9ae 100644 --- a/devices/rtx/device/frame/Frame.cu +++ b/devices/rtx/device/frame/Frame.cu @@ -30,9 +30,12 @@ */ #include "Frame.h" +#include "gpu/gpu_tonemap.h" +#include "gpu/gpu_util.h" #include "utility/instrument.h" // std #include +#include #include // thrust #include @@ -42,6 +45,222 @@ namespace visrtx { +namespace { + +// Resolve per-pixel (sourceIdx, divisor) for the current sub-frame. Mirrors +// compositeBackground so both kernels agree on which accumulator sample count +// and source pixel to read under checkerboarding. +__device__ bool resolveSample(uint32_t idx, + uvec2 size, + int frameID, + int checkerboardID, + uint32_t &sourceIdx, + int &divisor) +{ + sourceIdx = idx; + divisor = frameID; + if (checkerboardID >= 0 && checkerboardID < 3) { + const uint32_t px = idx % size.x; + const uint32_t py = idx / size.x; + const int pixTile = (px & 1) | ((py & 1) << 1); + if (pixTile <= checkerboardID) { + divisor = frameID + 1; + } else if (frameID == 0) { + sourceIdx = (px & ~1u) + (py & ~1u) * size.x; + divisor = 1; + } + } + return divisor > 0; +} + +__global__ void prepareDenoiseInput(const vec4 *__restrict__ accumColor, + vec4 *__restrict__ denoiseInput, + uvec2 size, + int frameID, + int checkerboardID, + bool fireflyFilter) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + uint32_t srcIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, srcIdx, divisor)) { + denoiseInput[idx] = vec4(0.f); + return; + } + + vec4 c = accumColor[srcIdx] / float(divisor); + if (fireflyFilter) + c = detail::inverseTonemap(c); + denoiseInput[idx] = c; +} + +void launchPrepareDenoiseInput(const vec4 *accumColor, + vec4 *denoiseInput, + uvec2 size, + int frameID, + int checkerboardID, + bool fireflyFilter, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + prepareDenoiseInput<<>>( + accumColor, denoiseInput, size, frameID, checkerboardID, fireflyFilter); +} + +__global__ void prepareDenoiseGuides(const vec3 *__restrict__ accumAlbedo, + const vec3 *__restrict__ accumNormal, + vec3 *__restrict__ denoiseAlbedo, + vec3 *__restrict__ denoiseNormal, + uvec2 size, + int frameID, + int checkerboardID) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + uint32_t srcIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, srcIdx, divisor)) { + if (denoiseAlbedo) + denoiseAlbedo[idx] = vec3(0.f); + if (denoiseNormal) + denoiseNormal[idx] = vec3(0.f); + return; + } + + const float invDivisor = 1.0f / float(divisor); + if (denoiseAlbedo) + denoiseAlbedo[idx] = accumAlbedo[srcIdx] * invDivisor; + + if (denoiseNormal) { + const vec3 n = accumNormal[srcIdx]; + const float len = glm::length(n); + constexpr float NORMAL_EPSILON = 1e-6f; + denoiseNormal[idx] = len > NORMAL_EPSILON ? n * (1.0f / len) : vec3(0.f); + } +} + +void launchPrepareDenoiseGuides(const vec3 *accumAlbedo, + const vec3 *accumNormal, + vec3 *denoiseAlbedo, + vec3 *denoiseNormal, + uvec2 size, + int frameID, + int checkerboardID, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + prepareDenoiseGuides<<>>(accumAlbedo, + accumNormal, + denoiseAlbedo, + denoiseNormal, + size, + frameID, + checkerboardID); +} + +__global__ void compositeBackground(vec4 *__restrict__ accumColor, + vec4 *__restrict__ pixelBuf, + uint32_t *__restrict__ uintBuf, + RendererGPUData renderer, + uvec2 size, + vec2 invSize, + FrameFormat format, + int frameID, + int checkerboardID, + bool isDenoised) +{ + const uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= size.x * size.y) + return; + + uint32_t sourceIdx; + int divisor; + if (!resolveSample(idx, size, frameID, checkerboardID, sourceIdx, divisor)) + return; + + const uint32_t px = idx % size.x; + const uint32_t py = idx / size.x; + + vec4 rendered; + if (isDenoised) { + // The denoiser fills pixelBuf at every pixel, so reading from sourceIdx + // would race against another thread compositing into that same slot. + // Read RGB from this thread's own pixel; only the alpha needs the + // checkerboard source redirect because accumColor is sparse. + rendered = pixelBuf[idx]; + rendered.a = accumColor[sourceIdx].a / float(divisor); + } else { + rendered = accumColor[sourceIdx] / float(divisor); + if (renderer.fireflyFilter) + rendered = detail::inverseTonemap(rendered); + } + + const vec2 uv = (vec2(px, py) + 0.5f) * invSize; + + vec4 bg; + if (renderer.backgroundMode == BackgroundMode::COLOR) { + bg = renderer.background.color; + } else { + const auto s = tex2D(renderer.background.texobj, uv.x, uv.y); + bg = vec4(s.x, s.y, s.z, s.w); + } + + vec3 rgb = vec3(rendered); + float alpha = rendered.a; + accumulateValue(rgb, vec3(bg) * bg.a, alpha); + accumulateValue(alpha, bg.a, alpha); + + if (!renderer.premultipliedAlpha && alpha > 0.0f) + rgb *= 1.0f / alpha; + + vec4 rgba = vec4(rgb, alpha); + if (format == FrameFormat::SRGB) { + uintBuf[idx] = glm::packUnorm4x8(glm::convertLinearToSRGB(rgba)); + } else if (format == FrameFormat::UINT) { + uintBuf[idx] = glm::packUnorm4x8(rgba); + } else { + pixelBuf[idx] = rgba; + } +} + +void launchCompositeBackground(vec4 *accumColor, + vec4 *pixelBuf, + uint32_t *uintBuf, + const RendererGPUData &renderer, + uvec2 size, + vec2 invSize, + FrameFormat format, + int frameID, + int checkerboardID, + bool isDenoised, + cudaStream_t stream) +{ + const uint32_t nPixels = size.x * size.y; + const uint32_t blockSize = 256; + const uint32_t gridSize = (nPixels + blockSize - 1) / blockSize; + compositeBackground<<>>(accumColor, + pixelBuf, + uintBuf, + renderer, + size, + invSize, + format, + frameID, + checkerboardID, + isDenoised); +} + +} // anonymous namespace + Frame::Frame(DeviceGlobalState *d) : helium::BaseFrame(d), m_denoiser(d) { cudaEventCreate(&m_eventStart); @@ -111,12 +330,6 @@ void Frame::finalize() auto &hd = data(); const bool useFloatFB = m_denoise || m_colorType == ANARI_FLOAT32_VEC4; - if (useFloatFB) - hd.fb.format = FrameFormat::FLOAT; - else if (m_colorType == ANARI_UFIXED8_RGBA_SRGB) - hd.fb.format = FrameFormat::SRGB; - else - hd.fb.format = FrameFormat::UINT; hd.fb.invSize = 1.f / vec2(hd.fb.size); @@ -153,15 +366,23 @@ void Frame::finalize() else m_accumNormal.reset(); - hd.fb.buffers.colorAccumulation = m_accumColor.ptrAs(); - - hd.fb.buffers.outColorVec4 = nullptr; - hd.fb.buffers.outColorUint = nullptr; + if (m_denoise) { + m_denoiseInput.reserve(numPixels() * sizeof(vec4)); + if (m_denoiseUsingAlbedo) + m_denoiseAlbedo.reserve(numPixels() * sizeof(vec3)); + else + m_denoiseAlbedo.reset(); + if (m_denoiseUsingNormal) + m_denoiseNormal.reserve(numPixels() * sizeof(vec3)); + else + m_denoiseNormal.reset(); + } else { + m_denoiseInput.reset(); + m_denoiseAlbedo.reset(); + m_denoiseNormal.reset(); + } - if (useFloatFB) - hd.fb.buffers.outColorVec4 = (vec4 *)m_pixelBuffer.dataDevice(); - else - hd.fb.buffers.outColorUint = (uint32_t *)m_pixelBuffer.dataDevice(); + hd.fb.buffers.colorAccumulation = m_accumColor.ptrAs(); hd.fb.buffers.depth = channelDepth ? m_depthBuffer.dataDevice() : nullptr; hd.fb.buffers.primID = channelPrimID ? m_primIDBuffer.dataDevice() : nullptr; @@ -171,8 +392,12 @@ void Frame::finalize() hd.fb.buffers.normal = channelNormal ? m_accumNormal.ptrAs() : nullptr; if (m_denoise) - m_denoiser.setup( - hd.fb.size, m_pixelBuffer, m_colorType, m_accumAlbedo, m_accumNormal); + m_denoiser.setup(hd.fb.size, + m_pixelBuffer, + m_colorType, + m_denoiseInput, + m_denoiseAlbedo, + m_denoiseNormal); else m_denoiser.cleanup(); @@ -334,9 +559,60 @@ void Frame::renderFrame() else hd.fb.frameID += m_renderer->spp(); - if (m_denoise) + const bool useFloatOutput = m_denoise || m_colorType == ANARI_FLOAT32_VEC4; + + if (m_denoise) { + launchPrepareDenoiseInput(m_accumColor.ptrAs(), + m_denoiseInput.ptrAs(), + hd.fb.size, + hd.fb.frameID, + hd.fb.checkerboardID, + hd.renderer.fireflyFilter, + state.stream); + + if (m_denoiseUsingAlbedo || m_denoiseUsingNormal) { + launchPrepareDenoiseGuides(m_accumAlbedo.ptrAs(), + m_accumNormal.ptrAs(), + m_denoiseAlbedo.ptrAs(), + m_denoiseNormal.ptrAs(), + hd.fb.size, + hd.fb.frameID, + hd.fb.checkerboardID, + state.stream); + } + m_denoiser.launch(); + launchCompositeBackground(m_accumColor.ptrAs(), + (vec4 *)m_pixelBuffer.dataDevice(), + nullptr, + hd.renderer, + hd.fb.size, + hd.fb.invSize, + FrameFormat::FLOAT, + hd.fb.frameID, + hd.fb.checkerboardID, + /*isDenoised=*/true, + state.stream); + + m_denoiser.convertOutput(); + } else { + const FrameFormat outFormat = useFloatOutput ? FrameFormat::FLOAT + : m_colorType == ANARI_UFIXED8_RGBA_SRGB ? FrameFormat::SRGB + : FrameFormat::UINT; + launchCompositeBackground(m_accumColor.ptrAs(), + useFloatOutput ? (vec4 *)m_pixelBuffer.dataDevice() : nullptr, + useFloatOutput ? nullptr : (uint32_t *)m_pixelBuffer.dataDevice(), + hd.renderer, + hd.fb.size, + hd.fb.invSize, + outFormat, + hd.fb.frameID, + hd.fb.checkerboardID, + /*isDenoised=*/false, + state.stream); + } + if (m_callback) { cudaLaunchHostFunc( state.stream, diff --git a/devices/rtx/device/frame/Frame.h b/devices/rtx/device/frame/Frame.h index 6acbbc4d0..ba8236cde 100644 --- a/devices/rtx/device/frame/Frame.h +++ b/devices/rtx/device/frame/Frame.h @@ -35,8 +35,8 @@ #include "camera/Camera.h" #include "gpu/gpu_objects.h" #include "renderer/Renderer.h" -#include "world/World.h" #include "utility/DeviceObject.h" +#include "world/World.h" // helium #include "helium/BaseFrame.h" // std @@ -122,6 +122,13 @@ struct Frame : public helium::BaseFrame, public DeviceObject DeviceBuffer m_accumAlbedo; // vec3 DeviceBuffer m_accumNormal; // vec3 + // Per-pixel pre-denoise estimates. Keeping these separate from pixelBuffer + // avoids the denoiser reading its own previous output on non-rendered + // checkerboard pixels (which cycles-4 flicker at edges). + DeviceBuffer m_denoiseInput; // vec4 + DeviceBuffer m_denoiseAlbedo; // vec3 + DeviceBuffer m_denoiseNormal; // vec3 + helium::IntrusivePtr m_renderer; helium::IntrusivePtr m_camera; helium::IntrusivePtr m_world; diff --git a/devices/rtx/device/gpu/evalShading.h b/devices/rtx/device/gpu/evalShading.h index e9cd7f6b9..2c8558a01 100644 --- a/devices/rtx/device/gpu/evalShading.h +++ b/devices/rtx/device/gpu/evalShading.h @@ -81,14 +81,15 @@ VISRTX_DEVICE float materialEvaluateOpacity( } VISRTX_DEVICE vec3 materialEvaluateEmission( - const MaterialShadingState &shadingState, const vec3& outgoingDir) + const MaterialShadingState &shadingState, const vec3 &outgoingDir) { if (shadingState.callableBaseIndex == ~DeviceObjectIndex(0)) return vec3(0.0f, 0.0f, 0.0f); // Default emission color return optixDirectCall(shadingState.callableBaseIndex + int(SurfaceShaderEntryPoints::EvaluateEmission), - &shadingState.data, &outgoingDir); + &shadingState.data, + &outgoingDir); } VISRTX_DEVICE vec3 materialEvaluateTransmission( @@ -113,11 +114,12 @@ VISRTX_DEVICE vec3 materialEvaluateNormal( &shadingState.data); } -VISRTX_DEVICE NextRay materialNextRay(const MaterialShadingState &shadingState, - const Ray &ray, RandState& rs) +VISRTX_DEVICE NextRay materialNextRay( + const MaterialShadingState &shadingState, const Ray &ray, RandState &rs) { - if (shadingState.callableBaseIndex == ~DeviceObjectIndex(0)) // No next ray by defaut - return NextRay{vec4(0.0f), vec4(0.0f)}; + if (shadingState.callableBaseIndex + == ~DeviceObjectIndex(0)) // No next ray by default + return NextRay{vec3(0.0f), vec3(0.0f)}; return optixDirectCall(shadingState.callableBaseIndex + int(SurfaceShaderEntryPoints::EvaluateNextRay), diff --git a/devices/rtx/device/gpu/gpu_objects.h b/devices/rtx/device/gpu/gpu_objects.h index 549daaadf..f1197856a 100644 --- a/devices/rtx/device/gpu/gpu_objects.h +++ b/devices/rtx/device/gpu/gpu_objects.h @@ -363,6 +363,23 @@ struct MaterialGPUData MaterialParameter transmission; float ior; + + // KHR_materials_* extensions + DeviceObjectIndex occlusionSampler; + MaterialParameter specular; + MaterialParameter specularColor; + uint32_t useSpecular; + MaterialParameter clearcoat; + MaterialParameter clearcoatRoughness; + DeviceObjectIndex clearcoatNormalSampler; + MaterialParameter thickness; + float attenuationDistance; + vec3 attenuationColor; + MaterialParameter sheenColor; + MaterialParameter sheenRoughness; + MaterialParameter iridescence; + float iridescenceIor; + MaterialParameter iridescenceThickness; }; struct MDL @@ -715,7 +732,7 @@ struct RendererGPUData float inverseVolumeSamplingRate; float occlusionDistance; bool cullTriangleBF; - bool premultiplyBackground; + bool premultipliedAlpha; bool fireflyFilter; // enable internal tonemapping during sample accumulation glm::vec4 cutPlane; // cutting plane (nx,ny,nz,d); disabled when all zero (GPU // default) @@ -734,8 +751,6 @@ enum class FrameFormat struct FrameBuffers { glm::vec4 *colorAccumulation; - glm::vec4 *outColorVec4; - uint32_t *outColorUint; float *depth; uint32_t *primID; uint32_t *objID; @@ -750,7 +765,6 @@ struct FramebufferGPUData int frameID; int checkerboardID; float invFrameID; - FrameFormat format; glm::uvec2 size; glm::vec2 invSize; }; diff --git a/devices/rtx/device/gpu/gpu_tonemap.h b/devices/rtx/device/gpu/gpu_tonemap.h new file mode 100644 index 000000000..25f876158 --- /dev/null +++ b/devices/rtx/device/gpu/gpu_tonemap.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +// Tonemap helpers — safe to include from both PTX and regular CUDA sources. +// gpu_util.h includes and cannot be used from Frame.cu; +// this header provides the subset needed by the compositing kernel. +#pragma once + +#include "gpu_math.h" +// glm +#include +#include +#include + +namespace visrtx { +namespace detail { + +VISRTX_DEVICE vec3 tonemap(vec3 v) +{ + return v / (1.0f + glm::max(0.0f, compMax(v))); +} + +VISRTX_DEVICE vec3 inverseTonemap(vec3 v) +{ + return v / glm::max(1e-12f, 1.f - compMax(v)); +} + +VISRTX_DEVICE vec4 tonemap(vec4 v) +{ + return vec4(tonemap(vec3(v)), v.w); +} + +VISRTX_DEVICE vec4 inverseTonemap(vec4 v) +{ + return vec4(inverseTonemap(vec3(v)), v.w); +} + +} // namespace detail +} // namespace visrtx diff --git a/devices/rtx/device/gpu/gpu_util.h b/devices/rtx/device/gpu/gpu_util.h index 9ea8b894e..9e17c44ba 100644 --- a/devices/rtx/device/gpu/gpu_util.h +++ b/devices/rtx/device/gpu/gpu_util.h @@ -34,6 +34,7 @@ #include "cameraCreateRay.h" #include "gpu/gpu_debug.h" #include "gpu_objects.h" +#include "shadingState.h" // optix #include // std @@ -44,6 +45,7 @@ #include // cuda #include +#include "gpu_tonemap.h" #ifndef __CUDACC__ #error "gpu_util.h can only be included in device code" @@ -273,6 +275,11 @@ VISRTX_DEVICE bool isMiddelPixel( return pixel.x == (fb.size.x / 2) && pixel.y == (fb.size.y / 2); } +VISRTX_DEVICE bool continuesThroughSurface(const NextRay &nextRay) +{ + return (nextRay.flags & NEXT_RAY_CONTINUES_THROUGH_SURFACE) != 0u; +} + VISRTX_DEVICE vec3 sampleHDRI(const LightGPUData &ld, const vec2 &uv) { return vec3(make_vec4(tex2D<::float4>(ld.hdri.radiance, uv.x, uv.y))); @@ -293,14 +300,6 @@ VISRTX_DEVICE vec3 sampleHDRI(const LightGPUData &ld, const vec3 &rayDir) return sampleHDRI(ld, vec2(u, v)) * ld.hdri.scale; } -VISRTX_DEVICE vec4 getBackgroundImage( - const RendererGPUData &rd, const vec2 &loc) -{ - return rd.backgroundMode == BackgroundMode::COLOR - ? rd.background.color - : make_vec4(tex2D<::float4>(rd.background.texobj, loc.x, loc.y)); -} - VISRTX_DEVICE bool getBackgroundLight( const FrameGPUData &fd, const vec3 &rayDir, vec3 &outRadiance) { @@ -324,18 +323,6 @@ VISRTX_DEVICE bool getBackgroundLight( return hasVisibleHDRI; } -VISRTX_DEVICE vec4 getBackground( - const FrameGPUData &fd, const vec2 &loc, const vec3 &rayDir) -{ - vec3 hdriContribution; - const bool hasVisibleHDRI = getBackgroundLight(fd, rayDir, hdriContribution); - if (hasVisibleHDRI) - return vec4(hdriContribution, 1.f); - - // No visible HDRI, use background image/color - return getBackgroundImage(fd.renderer, loc); -} - VISRTX_DEVICE uint32_t computeGeometryPrimId(const SurfaceHit &hit) { if (!hit.foundHit) @@ -350,30 +337,6 @@ VISRTX_DEVICE uint32_t computeGeometryPrimId(const SurfaceHit &hit) namespace detail { -VISRTX_DEVICE -vec3 tonemap(vec3 v) -{ - return v / (1.0f + max(0.0f, compMax(v))); -} - -VISRTX_DEVICE -vec3 inverseTonemap(vec3 v) -{ - return v / max(1e-12f, 1.f - compMax(v)); -} - -VISRTX_DEVICE -vec4 tonemap(vec4 v) -{ - return vec4(tonemap(vec3(v)), v.w); -} - -VISRTX_DEVICE -vec4 inverseTonemap(vec4 v) -{ - return vec4(inverseTonemap(vec3(v)), v.w); -} - template VISRTX_DEVICE void accumValue(T *arr, size_t idx, const T &v) { @@ -402,18 +365,6 @@ VISRTX_DEVICE uint32_t pixelIndex( return pixel.x + pixel.y * fb.size.x; } -VISRTX_DEVICE void writeOutputColor( - const FramebufferGPUData &fb, const vec4 &color, const uint32_t idx) -{ - if (fb.format == FrameFormat::SRGB) { - fb.buffers.outColorUint[idx] = - glm::packUnorm4x8(glm::convertLinearToSRGB(color)); - } else if (fb.format == FrameFormat::UINT) - fb.buffers.outColorUint[idx] = glm::packUnorm4x8(color); - else - fb.buffers.outColorVec4[idx] = color; -} - } // namespace detail VISRTX_DEVICE void setPixelIds(const FramebufferGPUData &fb, @@ -454,48 +405,16 @@ VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const uvec2 &pixel, const vec4 &color, const vec3 &albedo, - const vec3 &normal, - const int frameIDOffset = 0) + const vec3 &normal) { const auto &fb = frame.fb; const uint32_t idx = detail::pixelIndex(fb, pixel); - const auto frameID = fb.frameID + frameIDOffset; - - // Conditionally apply tonemapping during accumulation - if (frame.renderer.fireflyFilter) - detail::accumValue( - fb.buffers.colorAccumulation, idx, detail::tonemap(color)); - else - detail::accumValue(fb.buffers.colorAccumulation, idx, color); + + detail::accumValue(fb.buffers.colorAccumulation, + idx, + frame.renderer.fireflyFilter ? detail::tonemap(color) : color); detail::accumValue(fb.buffers.albedo, idx, albedo); detail::accumValue(fb.buffers.normal, idx, normal); - - const auto accumColor = fb.buffers.colorAccumulation[idx]; - // Conditionally apply inverse tonemapping on output - const float frameDivisor = float(fb.frameID + frameIDOffset + 1); - const auto normalizedColor = accumColor / frameDivisor; - const auto outputColor = frame.renderer.fireflyFilter - ? detail::inverseTonemap(normalizedColor) - : normalizedColor; - - detail::writeOutputColor(fb, outputColor, idx); - - if (fb.checkerboardID == 0 && frameID == 0) { - auto adjPix = uvec2(pixel.x + 1, pixel.y + 0); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - - adjPix = uvec2(pixel.x + 0, pixel.y + 1); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - - adjPix = uvec2(pixel.x + 1, pixel.y + 1); - if (!pixelOutOfFrame(adjPix, fb)) { - detail::writeOutputColor(fb, outputColor, detail::pixelIndex(fb, adjPix)); - } - } } } // namespace visrtx diff --git a/devices/rtx/device/gpu/renderer/raygen_helpers.h b/devices/rtx/device/gpu/renderer/raygen_helpers.h index 8069090f3..5f65d6405 100644 --- a/devices/rtx/device/gpu/renderer/raygen_helpers.h +++ b/devices/rtx/device/gpu/renderer/raygen_helpers.h @@ -65,7 +65,9 @@ VISRTX_DEVICE float volumeAttenuation(ScreenSample &ss, const Ray &r) VISRTX_DEVICE float evaluateOpacity(const MaterialShadingState &shadingState) { return materialEvaluateOpacity(shadingState) - * (1.0f - glm::luminosity(materialEvaluateTransmission(shadingState))); + * (1.0f + - glm::luminosity(materialEvaluateTransmission(shadingState)) + * 0.85f); } // Templated rendering loop @@ -202,21 +204,19 @@ VISRTX_DEVICE void renderPixel(FrameGPUData &frameData, ScreenSample ss) // Otherwise, continue through transparent surface } - // Accumulate background for remaining transparency - const auto bg = getBackground(frameData, ss.screen, ray.dir); - const bool premultiplyBg = rendererParams.premultiplyBackground; - vec3 bgColor = premultiplyBg ? vec3(bg) * bg.a : vec3(bg); - - accumulateValue(outputColor, bgColor, outputOpacity); - accumulateValue(outputOpacity, bg.a, outputOpacity); + // Accumulate HDRI sky — marks sky pixels as opaque so the background + // compositing pass does not bleed through HDRI-covered pixels. + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + accumulateValue(outputColor, hdri, outputOpacity); + accumulateValue(outputOpacity, 1.f, outputOpacity); + } // Write accumulated sample to framebuffer accumPixelSample(frameData, ss.pixel, vec4(outputColor, outputOpacity), outputAlbedo, - outputNormal, - i); + outputNormal); } } diff --git a/devices/rtx/device/gpu/sampleLight.h b/devices/rtx/device/gpu/sampleLight.h index e54e3bf03..14bbe64cf 100644 --- a/devices/rtx/device/gpu/sampleLight.h +++ b/devices/rtx/device/gpu/sampleLight.h @@ -62,10 +62,10 @@ namespace visrtx { // Light sampling result containing direction, distance, radiance and PDF struct LightSample { - vec3 radiance; // Emitted radiance in direction of hit point (W⋅sr⁻¹⋅m⁻²) - vec3 dir; // Unit direction vector from hit point to light sample - float dist; // Distance from hit point to light sample - float pdf; // Probability density function value for this sample + vec3 radiance; // Emitted radiance in direction of hit point (W⋅sr⁻¹⋅m⁻²) + vec3 dir; // Unit direction vector from hit point to light sample + float dist; // Distance from hit point to light sample + float pdf; // Probability density function value for this sample }; namespace detail { @@ -74,8 +74,8 @@ VISRTX_DEVICE LightSample sampleDirectionalLight( const LightGPUData &ld, const mat4 &xfm) { LightSample ls; - // Transform light direction to world space and negate to get direction TO light - // (ld.distant.direction points FROM the light source) + // Transform light direction to world space and negate to get direction TO + // light (ld.distant.direction points FROM the light source) ls.dir = xfmVec(xfm, -ld.distant.direction); ls.dist = std::numeric_limits::infinity(); // For directional lights, irradiance is the amount of light per unit area @@ -135,16 +135,19 @@ VISRTX_DEVICE LightSample sampleSphereLight( // Area PDF = 1 / (4πr²), but we need solid angle PDF // Conversion: pdf_solid_angle = pdf_area * distance² / |cos θ| // For sphere: cos θ = dot(surface_normal, -light_direction) - // Surface normal at sampled point: direction from sphere center to sample point + // Surface normal at sampled point: direction from sphere center to sample + // point auto worldSphereCenter = xfmPoint(xfm, ld.sphere.position); auto surfaceNormal = normalize(worldSamplePos - worldSphereCenter); auto cosTheta = dot(surfaceNormal, -ls.dir); if (cosTheta > 0.0f) { // Note: For non-uniform scaling transforms, the area calculation would need - // to account for the transform's effect on surface area (determinant of jacobian) - // Currently assumes uniform scaling or no scaling of the light geometry - float areaPdf = 1.f / (4.f * float(M_PI) * ld.sphere.radius * ld.sphere.radius); + // to account for the transform's effect on surface area (determinant of + // jacobian) Currently assumes uniform scaling or no scaling of the light + // geometry + float areaPdf = + 1.f / (4.f * float(M_PI) * ld.sphere.radius * ld.sphere.radius); ls.pdf = areaPdf * pow2(ls.dist) / cosTheta; } else { // Back-facing surface element contributes no light @@ -179,9 +182,9 @@ VISRTX_DEVICE LightSample sampleRectLight( // Handle front/back face emission based on light configuration if (ld.rect.side.back) { if (ld.rect.side.front) - cosTheta = fabsf(cosTheta); // Both sides: always positive + cosTheta = fabsf(cosTheta); // Both sides: always positive else - cosTheta = -cosTheta; // Back only: flip to back face + cosTheta = -cosTheta; // Back only: flip to back face } // Front only: use cosTheta as-is (positive for front face) @@ -216,7 +219,8 @@ VISRTX_DEVICE LightSample sampleRingLight( // For uniform area sampling: r² = u₂(R² - r²) + r² where R=outer, r=inner auto outerRadius = ld.ring.radius; auto innerRadius = ld.ring.innerRadius; - auto r = sqrtf(u2 * (outerRadius * outerRadius - innerRadius * innerRadius) + innerRadius * innerRadius); + auto r = sqrtf(u2 * (outerRadius * outerRadius - innerRadius * innerRadius) + + innerRadius * innerRadius); // Create orthonormal basis with ring direction as normal auto direction = normalize(ld.ring.direction); @@ -246,7 +250,8 @@ VISRTX_DEVICE LightSample sampleRingLight( } else { // Falloff region: smooth interpolation using smoothstep function // smoothstep(t) = 3t² - 2t³ provides C¹ continuity - spot = (cosTheta - ld.ring.cosOuterAngle) / (ld.ring.cosInnerAngle - ld.ring.cosOuterAngle); + spot = (cosTheta - ld.ring.cosOuterAngle) + / (ld.ring.cosInnerAngle - ld.ring.cosOuterAngle); spot = spot * spot * (3.0f - 2.0f * spot); } @@ -258,7 +263,7 @@ VISRTX_DEVICE LightSample sampleRingLight( // Convert area PDF to solid angle PDF for proper Monte Carlo integration // Ring area = π(R² - r²), so area PDF = 1 / ring_area // Solid angle PDF = area_pdf * distance² / |cos θ| - float areaPdf = ld.ring.oneOverArea; // This is 1 / ring_area + float areaPdf = ld.ring.oneOverArea; // This is 1 / ring_area ls.pdf = areaPdf * pow2(ls.dist) / cosTheta; } else { ls.radiance = vec3(0.0f); @@ -290,14 +295,14 @@ VISRTX_DEVICE LightSample sampleSpotLight( // Apply spotlight cone attenuation with smooth falloff if (spot < ld.spot.cosOuterAngle) - spot = 0.f; // Outside cone: no illumination + spot = 0.f; // Outside cone: no illumination else if (spot > ld.spot.cosInnerAngle) - spot = 1.f; // Inside inner cone: full illumination + spot = 1.f; // Inside inner cone: full illumination else { // Falloff region: smooth interpolation using smoothstep spot = (spot - ld.spot.cosOuterAngle) / (ld.spot.cosInnerAngle - ld.spot.cosOuterAngle); - spot = spot * spot * (3.f - 2.f * spot); // smoothstep function + spot = spot * spot * (3.f - 2.f * spot); // smoothstep function } // Apply inverse square law with spotlight attenuation @@ -325,13 +330,15 @@ VISRTX_DEVICE LightSample sampleHDRILight( / glm::vec2(float(M_PI) * 2.0f, float(M_PI)); auto radiance = sampleHDRI(ld, uv); - // Calculate PDF using luminance (ITU-R BT.709 weights) and jacobian - // sin(θ) term accounts for the jacobian of spherical→rectangular mapping - auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * sinf(thetaPhi.x) * ld.hdri.pdfWeight; + // pdf_ω = (L/totalL) · pdfWeight; the equirectangular sinθ jacobian is + // already folded into the CDF (computeWeightedLuminance) and into + // pdfWeight's 2π²/(W·H) factor, so do not re-multiply by sinθ here. + auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * ld.hdri.pdfWeight; LightSample ls; ls.dir = xfmVec(xfm, dir); - ls.dist = std::numeric_limits::infinity(); // Environment is at infinity + ls.dist = + std::numeric_limits::infinity(); // Environment is at infinity ls.radiance = radiance * ld.hdri.scale; ls.pdf = pdf; @@ -342,7 +349,8 @@ VISRTX_DEVICE LightSample sampleHDRILight( const LightGPUData &ld, const mat4 &xfm, RandState &rs) { // Importance sampling using hierarchical (marginal/conditional) CDF approach - // First sample row (y) using marginal CDF, then column (x) using conditional CDF + // First sample row (y) using marginal CDF, then column (x) using conditional + // CDF auto y = inverseSampleCDF( ld.hdri.marginalCDF, ld.hdri.size.y, curand_uniform(&rs)); auto x = inverseSampleCDF(ld.hdri.conditionalCDF + y * ld.hdri.size.x, @@ -365,16 +373,17 @@ VISRTX_DEVICE LightSample sampleHDRILight( // uv.y ∈ [0,1] → θ ∈ [0,π], uv.x ∈ [0,1] → φ ∈ [0,2π] auto thetaPhi = float(M_PI) * glm::vec2(uv.y, 2.0f * (uv.x)); - // Calculate PDF using luminance and jacobian of spherical mapping + // pdf_ω = (L/totalL) · pdfWeight; the equirectangular sinθ jacobian is + // already folded into the CDF and pdfWeight, so do not re-multiply here. auto radiance = sampleHDRI(ld, uv); - auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * sinf(thetaPhi.x) * ld.hdri.pdfWeight; + auto pdf = dot(radiance, {0.2126f, 0.7152f, 0.0722f}) * ld.hdri.pdfWeight; LightSample ls; // Transform spherical direction to world space // ld.hdri.xfm is orthogonal, so we can use right-hand multiplication // instead of explicitly transposing/inverting the matrix ls.dir = xfmVec(xfm, sphericalCoordsToDirection(thetaPhi) * ld.hdri.xfm); - ls.dist = 1e20f; // Environment is effectively at infinity + ls.dist = 1e20f; // Environment is effectively at infinity ls.radiance = radiance * ld.hdri.scale; ls.pdf = pdf; diff --git a/devices/rtx/device/gpu/shadingState.h b/devices/rtx/device/gpu/shadingState.h index 467056b34..1831d5436 100644 --- a/devices/rtx/device/gpu/shadingState.h +++ b/devices/rtx/device/gpu/shadingState.h @@ -49,11 +49,18 @@ namespace visrtx { -// Describes the next ray to be traced, as a result of the EvaluateNextRay call +enum NextRayFlags : uint32_t +{ + NEXT_RAY_NONE = 0u, + NEXT_RAY_CONTINUES_THROUGH_SURFACE = 1u << 0 +}; + +// Describes the next ray to be traced, as a result of the EvaluateNextRay call. struct NextRay { vec3 direction; vec3 contributionWeight; + uint32_t flags{NEXT_RAY_NONE}; }; // Matte @@ -75,6 +82,22 @@ struct PhysicallyBasedShadingState float transmission; float ior; vec3 emission; + + float occlusion; + float specular; + vec3 specularColor; + uint32_t useSpecular; + float clearcoat; + float clearcoatRoughness; + vec3 clearcoatNormal; + float thickness; + float attenuationDistance; + vec3 attenuationColor; + vec3 sheenColor; + float sheenRoughness; + float iridescence; + float iridescenceIor; + float iridescenceThickness; }; #ifdef USE_MDL diff --git a/devices/rtx/device/light/sampling/CDF.cu b/devices/rtx/device/light/sampling/CDF.cu index dddde35a3..3dfcedea2 100644 --- a/devices/rtx/device/light/sampling/CDF.cu +++ b/devices/rtx/device/light/sampling/CDF.cu @@ -129,27 +129,29 @@ void computeConditionalCDFs( } } -void normalizeMarginalCDF(float *marginalCdf, int height) +void normalizeCDF(thrust::device_ptr cdf, int n) { - using thrust::device_pointer_cast; + const float total = cdf[n - 1]; + if (total > 0.0f) { + thrust::transform( + cdf, cdf + n, cdf, [total] __device__(float x) { return x / total; }); + } else { + // Empty distribution; fill with uniform values so sampling doesn't walk off + // the end. + thrust::fill(cdf, cdf + n, 1.0f); + } +} - auto cdf = device_pointer_cast(marginalCdf); - thrust::transform(cdf, - cdf + height, - cdf, - [total = cdf[height - 1]] __device__(float x) { return x / total; }); +void normalizeMarginalCDF(float *marginalCdf, int height) +{ + normalizeCDF(thrust::device_pointer_cast(marginalCdf), height); } void normalizeConditionalCDFs(float *d_conditional_cdf, int width, int height) { - using thrust::device_pointer_cast; - for (int y = 0; y < height; ++y) { - auto cdfRow = device_pointer_cast(d_conditional_cdf + y * width); - thrust::transform( - cdfRow, cdfRow + width, cdfRow, [total = cdfRow[width - 1]] __device__(float x) { - return x / total; - }); + normalizeCDF( + thrust::device_pointer_cast(d_conditional_cdf + y * width), width); } } @@ -172,22 +174,25 @@ float generateCDFTables(const float *luminanceImage, computeRowSums(luminanceImage, rowSums.ptrAs(), width, height); computeMarginalCDF( rowSums.ptrAs(), marginalCdf->ptrAs(), height); - computeConditionalCDFs(luminanceImage, - conditionalCdf->ptrAs(), - width, - height); + computeConditionalCDFs( + luminanceImage, conditionalCdf->ptrAs(), width, height); // Compute pdfWeight // Not the best, but accumulation operations of cdfs accumulate error. // Lets recompute the total luminance from the luminance array // to avoid this. - auto totalLuminance = reduce( - device_pointer_cast(luminanceImage), - device_pointer_cast(luminanceImage) + width * height); - - float angularArea = 4.0f * float(M_PI) / (width * height); - float weight = 1.0f / (totalLuminance * angularArea); + auto totalLuminance = reduce(device_pointer_cast(luminanceImage), + device_pointer_cast(luminanceImage) + width * height); + + // Equirectangular Jacobian |dω/d(u,v)| = 2π²·sinθ; the sinθ weighting is + // already folded into the CDF luminance, so the per-pixel area factor is + // 2π²/(W·H) and pdf_ω = (L/totalL) · (W·H)/(2π²). + // A zero-luminance map produces an inf weight; return 0 instead. + const float equirectJacobian = + 2.0f * float(M_PI) * float(M_PI) / (width * height); + const float weight = + totalLuminance > 0.0f ? 1.0f / (totalLuminance * equirectJacobian) : 0.0f; // Normalize both tables normalizeMarginalCDF(marginalCdf->ptrAs(), height); @@ -211,8 +216,11 @@ float generateCDFTables(const glm::vec3 *rgbImage, computeWeightedLuminance(rgbImage, luminance.ptrAs(), width, height); - return generateCDFTables( - luminance.ptrAs(), width, height, marginalCdf, conditionalCdf); + return generateCDFTables(luminance.ptrAs(), + width, + height, + marginalCdf, + conditionalCdf); } } // namespace visrtx diff --git a/devices/rtx/device/material/PBR.cpp b/devices/rtx/device/material/PBR.cpp index ca938f6f0..eecae93ce 100644 --- a/devices/rtx/device/material/PBR.cpp +++ b/devices/rtx/device/material/PBR.cpp @@ -43,7 +43,18 @@ PBR::PBR(DeviceGlobalState *d) m_roughnessSampler(this), m_normalSampler(this), m_emissiveSampler(this), - m_transmissionSampler(this) + m_occlusionSampler(this), + m_specularSampler(this), + m_specularColorSampler(this), + m_clearcoatSampler(this), + m_clearcoatRoughnessSampler(this), + m_clearcoatNormalSampler(this), + m_transmissionSampler(this), + m_thicknessSampler(this), + m_sheenColorSampler(this), + m_sheenRoughnessSampler(this), + m_iridescenceSampler(this), + m_iridescenceThicknessSampler(this) {} void PBR::commitParameters() @@ -52,7 +63,7 @@ void PBR::commitParameters() m_opacitySampler = getParamObject("opacity"); m_opacityAttribute = getParamString("opacity", ""); - m_color = vec4(vec3(0.8f), 1.f); + m_color = vec4(1.f); getParam("baseColor", ANARI_FLOAT32_VEC4, &m_color); getParam("baseColor", ANARI_FLOAT32_VEC3, &m_color); m_colorSampler = getParamObject("baseColor"); @@ -68,18 +79,70 @@ void PBR::commitParameters() m_normalSampler = getParamObject("normal"); - m_emissive = vec4(0.f, 0.f, 0.f, 0.f); + m_emissive = vec4(0.f); getParam("emissive", ANARI_FLOAT32_VEC4, &m_emissive); getParam("emissive", ANARI_FLOAT32_VEC3, &m_emissive); m_emissiveSampler = getParamObject("emissive"); m_emissiveAttribute = getParamString("emissive", ""); + m_occlusionSampler = getParamObject("occlusion"); + + m_useSpecular = getParamDirect("specular").valid() + || getParamDirect("specularColor").valid(); + m_specular = getParam("specular", m_useSpecular ? 1.f : 0.f); + m_specularSampler = getParamObject("specular"); + m_specularAttribute = getParamString("specular", ""); + + m_specularColor = vec3(1.f); + getParam("specularColor", ANARI_FLOAT32_VEC3, &m_specularColor); + m_specularColorSampler = getParamObject("specularColor"); + m_specularColorAttribute = getParamString("specularColor", ""); + + m_clearcoat = getParam("clearcoat", 0.f); + m_clearcoatSampler = getParamObject("clearcoat"); + m_clearcoatAttribute = getParamString("clearcoat", ""); + + m_clearcoatRoughness = getParam("clearcoatRoughness", 0.f); + m_clearcoatRoughnessSampler = getParamObject("clearcoatRoughness"); + m_clearcoatRoughnessAttribute = getParamString("clearcoatRoughness", ""); + + m_clearcoatNormalSampler = getParamObject("clearcoatNormal"); + m_transmission = getParam("transmission", 0.f); m_transmissionSampler = getParamObject("transmission"); m_transmissionAttribute = getParamString("transmission", ""); m_ior = getParam("ior", 1.5f); + m_thickness = getParam("thickness", 0.f); + m_thicknessSampler = getParamObject("thickness"); + m_thicknessAttribute = getParamString("thickness", ""); + + m_attenuationDistance = getParam( + "attenuationDistance", std::numeric_limits::infinity()); + m_attenuationColor = vec3(1.f); + getParam("attenuationColor", ANARI_FLOAT32_VEC3, &m_attenuationColor); + + m_sheenColor = vec3(0.f); + getParam("sheenColor", ANARI_FLOAT32_VEC3, &m_sheenColor); + m_sheenColorSampler = getParamObject("sheenColor"); + m_sheenColorAttribute = getParamString("sheenColor", ""); + + m_sheenRoughness = getParam("sheenRoughness", 0.f); + m_sheenRoughnessSampler = getParamObject("sheenRoughness"); + m_sheenRoughnessAttribute = getParamString("sheenRoughness", ""); + + m_iridescence = getParam("iridescence", 0.f); + m_iridescenceSampler = getParamObject("iridescence"); + m_iridescenceAttribute = getParamString("iridescence", ""); + + m_iridescenceIor = getParam("iridescenceIor", 1.3f); + + m_iridescenceThickness = getParam("iridescenceThickness", 0.f); + m_iridescenceThicknessSampler = + getParamObject("iridescenceThickness"); + m_iridescenceThicknessAttribute = getParamString("iridescenceThickness", ""); + m_cutoff = getParam("alphaCutoff", 0.5f); m_mode = alphaModeFromString(getParamString("alphaMode", "opaque")); } @@ -87,39 +150,81 @@ void PBR::commitParameters() MaterialGPUData PBR::gpuData() const { MaterialGPUData retval; + auto &pb = retval.materialData.physicallyBased; retval.callableBaseIndex = static_cast(SbtCallableEntryPoints::PBR); - populateMaterialParameter(retval.materialData.physicallyBased.baseColor, - m_color, - m_colorSampler.get(), - m_colorAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.opacity, - m_opacity, - m_opacitySampler.get(), - m_opacityAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.metallic, - m_metallic, - m_metallicSampler.get(), - m_metallicAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.roughness, + populateMaterialParameter( + pb.baseColor, m_color, m_colorSampler.get(), m_colorAttribute); + populateMaterialParameter( + pb.opacity, m_opacity, m_opacitySampler.get(), m_opacityAttribute); + populateMaterialParameter( + pb.metallic, m_metallic, m_metallicSampler.get(), m_metallicAttribute); + populateMaterialParameter(pb.roughness, m_roughness, m_roughnessSampler.get(), m_roughnessAttribute); - retval.materialData.physicallyBased.normalSampler = + pb.normalSampler = m_normalSampler ? m_normalSampler->index() : ~DeviceObjectIndex{0}; - populateMaterialParameter(retval.materialData.physicallyBased.emissive, - m_emissive, - m_emissiveSampler.get(), - m_emissiveAttribute); - populateMaterialParameter(retval.materialData.physicallyBased.transmission, + populateMaterialParameter( + pb.emissive, m_emissive, m_emissiveSampler.get(), m_emissiveAttribute); + populateMaterialParameter(pb.transmission, m_transmission, m_transmissionSampler.get(), m_transmissionAttribute); - retval.materialData.physicallyBased.ior = m_ior; - retval.materialData.physicallyBased.cutoff = m_cutoff; - retval.materialData.physicallyBased.alphaMode = m_mode; + pb.ior = m_ior; + pb.cutoff = m_cutoff; + pb.alphaMode = m_mode; + + pb.occlusionSampler = + m_occlusionSampler ? m_occlusionSampler->index() : ~DeviceObjectIndex{0}; + + populateMaterialParameter( + pb.specular, m_specular, m_specularSampler.get(), m_specularAttribute); + populateMaterialParameter(pb.specularColor, + vec4(m_specularColor, 1.f), + m_specularColorSampler.get(), + m_specularColorAttribute); + pb.useSpecular = m_useSpecular ? 1u : 0u; + + populateMaterialParameter(pb.clearcoat, + m_clearcoat, + m_clearcoatSampler.get(), + m_clearcoatAttribute); + populateMaterialParameter(pb.clearcoatRoughness, + m_clearcoatRoughness, + m_clearcoatRoughnessSampler.get(), + m_clearcoatRoughnessAttribute); + pb.clearcoatNormalSampler = m_clearcoatNormalSampler + ? m_clearcoatNormalSampler->index() + : ~DeviceObjectIndex{0}; + + populateMaterialParameter(pb.thickness, + m_thickness, + m_thicknessSampler.get(), + m_thicknessAttribute); + pb.attenuationDistance = m_attenuationDistance; + pb.attenuationColor = m_attenuationColor; + + populateMaterialParameter(pb.sheenColor, + vec4(m_sheenColor, 0.f), + m_sheenColorSampler.get(), + m_sheenColorAttribute); + populateMaterialParameter(pb.sheenRoughness, + m_sheenRoughness, + m_sheenRoughnessSampler.get(), + m_sheenRoughnessAttribute); + + populateMaterialParameter(pb.iridescence, + m_iridescence, + m_iridescenceSampler.get(), + m_iridescenceAttribute); + pb.iridescenceIor = m_iridescenceIor; + populateMaterialParameter(pb.iridescenceThickness, + m_iridescenceThickness, + m_iridescenceThicknessSampler.get(), + m_iridescenceThicknessAttribute); return retval; } diff --git a/devices/rtx/device/material/PBR.h b/devices/rtx/device/material/PBR.h index 71b12a3a5..e6ce2cc80 100644 --- a/devices/rtx/device/material/PBR.h +++ b/devices/rtx/device/material/PBR.h @@ -31,8 +31,10 @@ #pragma once -#include "sampler/Sampler.h" #include "Material.h" +#include "sampler/Sampler.h" + +#include namespace visrtx { @@ -48,7 +50,7 @@ struct PBR : public Material float m_cutoff{0.5f}; AlphaMode m_mode{AlphaMode::OPAQUE}; - vec4 m_color{vec3(0.8f), 1.f}; + vec4 m_color{1.f, 1.f, 1.f, 1.f}; helium::ChangeObserverPtr m_colorSampler; std::string m_colorAttribute; @@ -70,11 +72,57 @@ struct PBR : public Material helium::ChangeObserverPtr m_emissiveSampler; std::string m_emissiveAttribute; + helium::ChangeObserverPtr m_occlusionSampler; + + float m_specular{0.f}; + helium::ChangeObserverPtr m_specularSampler; + std::string m_specularAttribute; + bool m_useSpecular{false}; + + vec3 m_specularColor{1.f}; + helium::ChangeObserverPtr m_specularColorSampler; + std::string m_specularColorAttribute; + + float m_clearcoat{0.f}; + helium::ChangeObserverPtr m_clearcoatSampler; + std::string m_clearcoatAttribute; + + float m_clearcoatRoughness{0.f}; + helium::ChangeObserverPtr m_clearcoatRoughnessSampler; + std::string m_clearcoatRoughnessAttribute; + + helium::ChangeObserverPtr m_clearcoatNormalSampler; + float m_transmission{0.f}; helium::ChangeObserverPtr m_transmissionSampler; std::string m_transmissionAttribute; float m_ior{1.5f}; + + float m_thickness{0.f}; + helium::ChangeObserverPtr m_thicknessSampler; + std::string m_thicknessAttribute; + + float m_attenuationDistance{std::numeric_limits::infinity()}; + vec3 m_attenuationColor{1.f}; + + vec3 m_sheenColor{0.f}; + helium::ChangeObserverPtr m_sheenColorSampler; + std::string m_sheenColorAttribute; + + float m_sheenRoughness{0.f}; + helium::ChangeObserverPtr m_sheenRoughnessSampler; + std::string m_sheenRoughnessAttribute; + + float m_iridescence{0.f}; + helium::ChangeObserverPtr m_iridescenceSampler; + std::string m_iridescenceAttribute; + + float m_iridescenceIor{1.3f}; + + float m_iridescenceThickness{0.f}; + helium::ChangeObserverPtr m_iridescenceThicknessSampler; + std::string m_iridescenceThicknessAttribute; }; } // namespace visrtx diff --git a/devices/rtx/device/material/shaders/MDLShader_ptx.cu b/devices/rtx/device/material/shaders/MDLShader_ptx.cu index cef1e7083..795b4f1f2 100644 --- a/devices/rtx/device/material/shaders/MDLShader_ptx.cu +++ b/devices/rtx/device/material/shaders/MDLShader_ptx.cu @@ -204,7 +204,7 @@ NextRay __direct_callable__nextRay( if (curand_uniform(rs) > mdlOpacity(&shadingState->state, &shadingState->resData, shadingState->argBlock)) { - return NextRay{ray->dir, vec3(1.0f)}; + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; } // Sample @@ -227,10 +227,17 @@ NextRay __direct_callable__nextRay( &shadingState->resData, shadingState->argBlock); - return NextRay{vec3(sample_data.k2.x, sample_data.k2.y, sample_data.k2.z), + const vec3 direction(sample_data.k2.x, sample_data.k2.y, sample_data.k2.z); + const vec3 N = normalize(make_vec3(shadingState->state.normal)); + const uint32_t flags = dot(ray->dir, N) * dot(direction, N) > 0.0f + ? NEXT_RAY_CONTINUES_THROUGH_SURFACE + : NEXT_RAY_NONE; + + return NextRay{direction, vec3(sample_data.bsdf_over_pdf.x, sample_data.bsdf_over_pdf.y, - sample_data.bsdf_over_pdf.z)}; + sample_data.bsdf_over_pdf.z), + flags}; } // Signature must match the call inside shaderMDLSurface in MDLShader.cuh. @@ -273,12 +280,11 @@ vec3 __direct_callable__evaluateTransmission( const MDLShadingState *shadingState) { return mdlTransmission( - &shadingState->state, &shadingState->resData, shadingState->argBlock) - * 0.85f; + &shadingState->state, &shadingState->resData, shadingState->argBlock); } VISRTX_CALLABLE vec3 __direct_callable__evaluateNormal(const MDLShadingState *shadingState) { return make_vec3(shadingState->state.normal); -} \ No newline at end of file +} diff --git a/devices/rtx/device/material/shaders/MatteShader_ptx.cu b/devices/rtx/device/material/shaders/MatteShader_ptx.cu index ae0d34375..06429e5fa 100644 --- a/devices/rtx/device/material/shaders/MatteShader_ptx.cu +++ b/devices/rtx/device/material/shaders/MatteShader_ptx.cu @@ -53,7 +53,12 @@ VISRTX_CALLABLE void __direct_callable__init(MatteShadingState *shadingState, VISRTX_CALLABLE NextRay __direct_callable__nextRay( const MatteShadingState *shadingState, const Ray *ray, RandState *rs) { - return NextRay{vec3(0.0f, 0.0f, 0.0f), vec3(0.0f, 0.0f, 0.0f)}; + // Before anything, check for opacity. If below, then we just pass through + if (curand_uniform(rs) > shadingState->opacity) { + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; + } + + return NextRay{vec3(0.0f), vec3(0.0f)}; } VISRTX_CALLABLE diff --git a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu index 7abbde0a4..5e710b729 100644 --- a/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu +++ b/devices/rtx/device/material/shaders/PhysicallyBasedShader_ptx.cu @@ -38,176 +38,512 @@ using namespace visrtx; +// Clearcoat is fixed to IOR 1.5 per glTF KHR_materials_clearcoat (F0 = 0.04). +constexpr float CLEARCOAT_F0 = 0.04f; + +//----------------------------------------------------------------------------- +// Helpers +//----------------------------------------------------------------------------- + +VISRTX_DEVICE vec3 applyNormalMap( + const vec3 &tangentSpaceNormal, const SurfaceHit &hit, const vec3 &N) +{ + vec3 T = normalize(hit.tU); + vec3 B = normalize(hit.tV); + // Gram-Schmidt to build an orthonormal frame tied to N. + T = normalize(T - dot(T, N) * N); + B = normalize(B - dot(B, N) * N - dot(B, T) * T); + return normalize(T * tangentSpaceNormal.x + B * tangentSpaceNormal.y + + N * tangentSpaceNormal.z); +} + +VISRTX_DEVICE vec3 sampleNormalMap(const FrameGPUData &fd, + DeviceObjectIndex samplerIdx, + const SurfaceHit &hit, + const vec3 &fallback) +{ + if (samplerIdx == ~visrtx::DeviceObjectIndex{0}) + return fallback; + const vec3 ts = normalize(evaluateSampler(fd, samplerIdx, hit) * 2.0f - 1.0f); + return applyNormalMap(ts, hit, hit.Ns); +} + +VISRTX_DEVICE float luminance(const vec3 &c) +{ + return dot(c, vec3(0.2126f, 0.7152f, 0.0722f)); +} + +VISRTX_DEVICE vec3 computeVolumeTransmission( + const PhysicallyBasedShadingState *state) +{ + if (!(state->thickness > 0.0f && state->attenuationDistance > 0.0f + && isfinite(state->attenuationDistance))) + return vec3(1.0f); + + const float k = state->thickness / state->attenuationDistance; + return vec3(powf(fmaxf(state->attenuationColor.x, 1e-6f), k), + powf(fmaxf(state->attenuationColor.y, 1e-6f), k), + powf(fmaxf(state->attenuationColor.z, 1e-6f), k)); +} + +VISRTX_DEVICE vec3 computeTransmissionFilter( + const PhysicallyBasedShadingState *state) +{ + const float transmission = + fmaxf(0.0f, (1.0f - state->metallic) * state->transmission); + return state->baseColor * transmission * computeVolumeTransmission(state); +} + +// Smith Lambda for GGX (common subterm of G1 / G2). +VISRTX_DEVICE float smithLambdaGGX(float NdotX, float alpha2) +{ + const float NdotX2 = NdotX * NdotX; + const float safe = fmaxf(NdotX2, 1e-8f); + return 0.5f + * (-1.0f + sqrtf(fmaxf(0.0f, 1.0f + alpha2 * (1.0f - safe) / safe))); +} + +VISRTX_DEVICE float smithG2GGX(float NdotV, float NdotL, float alpha2) +{ + return 1.0f + / (1.0f + smithLambdaGGX(NdotV, alpha2) + smithLambdaGGX(NdotL, alpha2)); +} + +VISRTX_DEVICE float smithG1GGX(float NdotV, float alpha2) +{ + return 1.0f / (1.0f + smithLambdaGGX(NdotV, alpha2)); +} + +VISRTX_DEVICE float ggxD(float NdotH, float alpha2) +{ + const float denom = NdotH * NdotH * (alpha2 - 1.0f) + 1.0f; + return alpha2 / (float(M_PI) * denom * denom); +} + +// Heitz 2018 (https://jcgt.org/published/0007/04/01/) visible-normal sampling +// for GGX. Ve is the view direction in local tangent space (+z = normal). +VISRTX_DEVICE vec3 sampleGGXVNDF( + const vec3 &Ve, float alpha, float u1, float u2) +{ + const vec3 Vh = normalize(vec3(alpha * Ve.x, alpha * Ve.y, Ve.z)); + const float lensq = Vh.x * Vh.x + Vh.y * Vh.y; + const vec3 T1 = lensq > 0.0f ? vec3(-Vh.y, Vh.x, 0.0f) * (1.0f / sqrtf(lensq)) + : vec3(1.0f, 0.0f, 0.0f); + const vec3 T2 = glm::cross(Vh, T1); + const float r = sqrtf(u1); + const float phi = 2.0f * float(M_PI) * u2; + const float t1 = r * cosf(phi); + float t2 = r * sinf(phi); + const float s = 0.5f * (1.0f + Vh.z); + t2 = (1.0f - s) * sqrtf(fmaxf(0.0f, 1.0f - t1 * t1)) + s * t2; + const vec3 Nh = + t1 * T1 + t2 * T2 + sqrtf(fmaxf(0.0f, 1.0f - t1 * t1 - t2 * t2)) * Vh; + return normalize(vec3(alpha * Nh.x, alpha * Nh.y, fmaxf(0.0f, Nh.z))); +} + +// Charlie distribution (Estevez-Kulla 2017) for sheen. +VISRTX_DEVICE float charlieD(float NdotH, float alpha) +{ + const float invAlpha = 1.0f / fmaxf(alpha, 1e-4f); + const float sin2 = fmaxf(0.0f, 1.0f - NdotH * NdotH); + return (2.0f + invAlpha) * powf(sin2, 0.5f * invAlpha) / (2.0f * float(M_PI)); +} + +// Ashikhmin visibility term (Neubelt-Pettineo variant) used with Charlie D. +VISRTX_DEVICE float charlieV(float NdotV, float NdotL) +{ + return 1.0f / (4.0f * (NdotV + NdotL - NdotV * NdotL) + 1e-6f); +} + +// glTF KHR_materials_iridescence thin-film Fresnel (port of the reference +// implementation at github.com/KhronosGroup/glTF-Sample-Renderer). Returns a +// per-channel Fresnel reflectance for a thin film of thickness T sitting on a +// base with Schlick F0. See the spec's Appendix B for the math. +VISRTX_DEVICE vec3 fresnel0ToIor(vec3 F0) +{ + const vec3 s = sqrt(glm::clamp(F0, vec3(0.0f), vec3(0.9999f))); + return (vec3(1.0f) + s) / (vec3(1.0f) - s); +} + +VISRTX_DEVICE vec3 iorToFresnel0(vec3 transmittedIor, float incidentIor) +{ + const vec3 t = (transmittedIor - vec3(incidentIor)) + / (transmittedIor + vec3(incidentIor)); + return t * t; +} + +VISRTX_DEVICE float iorToFresnel0(float transmittedIor, float incidentIor) +{ + const float t = + (transmittedIor - incidentIor) / (transmittedIor + incidentIor); + return t * t; +} + +VISRTX_DEVICE vec3 evalSensitivity(float opd, vec3 shift) +{ + // Approximate spectral sensitivity of the standard observer as three + // Gaussians (Belcour & Barla 2017, simplified) so the result stays in RGB. + const float phase = 2.0f * float(M_PI) * opd * 1e-9f; + const vec3 val = vec3(5.4856e-13f, 4.4201e-13f, 5.2481e-13f); + const vec3 pos = vec3(1.6810e+06f, 1.7953e+06f, 2.2084e+06f); + const vec3 var = vec3(4.3278e+09f, 9.3046e+09f, 6.6121e+09f); + + vec3 xyz = val * sqrt(2.0f * float(M_PI) * var) * cos(pos * phase + shift) + * exp(-var * phase * phase); + xyz.x += 9.7470e-14f * sqrtf(2.0f * float(M_PI) * 4.5282e+09f) + * cosf(2.2399e+06f * phase + shift.x) + * expf(-4.5282e+09f * phase * phase); + xyz /= 1.0685e-7f; + + // sRGB conversion (D65). + return vec3(3.2404542f * xyz.x - 1.5371385f * xyz.y - 0.4985314f * xyz.z, + -0.9692660f * xyz.x + 1.8760108f * xyz.y + 0.0415560f * xyz.z, + 0.0556434f * xyz.x - 0.2040259f * xyz.y + 1.0572252f * xyz.z); +} + +VISRTX_DEVICE vec3 evalIridescence(float outsideIor, + float iridescenceIor, + float cosTheta1, + float thickness, + vec3 baseF0) +{ + // Handle the case where thin-film IOR is close to the outside IOR: return + // the base Fresnel to avoid division by zero and phase artifacts. + const float iridescenceIorSafe = fmaxf(iridescenceIor, outsideIor + 1e-4f); + + // Force iridescenceIor > outsideIor (otherwise Snell's law cannot refract). + const float sinTheta2Sq = + pow2(outsideIor / iridescenceIorSafe) * (1.0f - cosTheta1 * cosTheta1); + const float cosTheta2Sq = 1.0f - sinTheta2Sq; + if (cosTheta2Sq < 0.0f) + return vec3(1.0f); // Total internal reflection. + const float cosTheta2 = sqrtf(cosTheta2Sq); + + // First interface: Fresnel between outside and thin film. + const float R0_12 = iorToFresnel0(iridescenceIorSafe, outsideIor); + const float R12 = R0_12 + (1.0f - R0_12) * pow5(1.0f - cosTheta1); + const float T121 = 1.0f - R12; + const float phi12 = iridescenceIorSafe < outsideIor ? float(M_PI) : 0.0f; + const float phi21 = float(M_PI) - phi12; + + // Second interface: film to base. + const vec3 baseIor = + fresnel0ToIor(glm::clamp(baseF0, vec3(0.f), vec3(0.9999f))); + const vec3 R1 = iorToFresnel0(baseIor, iridescenceIorSafe); + const vec3 R23 = R1 + (vec3(1.0f) - R1) * pow5(1.0f - cosTheta2); + const vec3 phi23 = vec3(baseIor.x < iridescenceIorSafe ? float(M_PI) : 0.0f, + baseIor.y < iridescenceIorSafe ? float(M_PI) : 0.0f, + baseIor.z < iridescenceIorSafe ? float(M_PI) : 0.0f); + + const float opd = 2.0f * iridescenceIorSafe * thickness * cosTheta2; + const vec3 phi = vec3(phi21) + phi23; + + const vec3 R123 = glm::clamp(R12 * R23, vec3(1e-5f), vec3(0.9999f)); + const vec3 r123 = sqrt(R123); + const vec3 Rs = pow2(T121) * R23 / (vec3(1.0f) - R123); + + // DC term. + vec3 C0 = R12 + Rs; + vec3 I = C0; + + // Higher-order terms. + vec3 Cm = Rs - T121; + for (int m = 1; m <= 2; ++m) { + Cm *= r123; + const vec3 Sm = 2.0f * evalSensitivity(float(m) * opd, float(m) * phi); + I += Cm * Sm; + } + + return glm::max(I, vec3(0.0f)); +} + +//----------------------------------------------------------------------------- +// Initialize shading state from material parameters +//----------------------------------------------------------------------------- + VISRTX_CALLABLE void __direct_callable__init( PhysicallyBasedShadingState *shadingState, const FrameGPUData *fd, const SurfaceHit *hit, const MaterialGPUData::PhysicallyBased *md) { - vec4 color = getMaterialParameter(*fd, md->baseColor, *hit); - float opacity = getMaterialParameter(*fd, md->opacity, *hit).x; + const vec4 color = getMaterialParameter(*fd, md->baseColor, *hit); + const float opacity = getMaterialParameter(*fd, md->opacity, *hit).x; shadingState->baseColor = vec3(color); - vec3 normal = hit->Ns; + const vec3 N = sampleNormalMap(*fd, md->normalSampler, *hit, hit->Ns); + shadingState->normal = N; - if (md->normalSampler != ~visrtx::DeviceObjectIndex{0}) { - // Normal mapping computation. - auto normalMapValue = - normalize(evaluateSampler(*fd, md->normalSampler, *hit) * 2.0f - 1.0f); - vec3 T = normalize(hit->tU); - vec3 B = normalize(hit->tV); - - // Ensure orthogonality (Gram-Schmidt process) - T = normalize(T - dot(T, normal) * normal); - B = normalize(B - dot(B, normal) * normal - dot(B, T) * T); - - // Transform normal from tangent space to world space - normal = normalize(T * normalMapValue.x + B * normalMapValue.y - + normal * normalMapValue.z); - } - - shadingState->normal = normal; shadingState->opacity = adjustedMaterialOpacity(color.w * opacity, md->alphaMode, md->cutoff); shadingState->ior = hit->isFrontFace ? 1.0f / md->ior : md->ior; shadingState->metallic = getMaterialParameter(*fd, md->metallic, *hit).x; shadingState->roughness = getMaterialParameter(*fd, md->roughness, *hit).x; - - // Emission mapping shadingState->emission = vec3(getMaterialParameter(*fd, md->emissive, *hit)); - - // Transmission shadingState->transmission = getMaterialParameter(*fd, md->transmission, *hit).x; + + shadingState->occlusion = + md->occlusionSampler == ~visrtx::DeviceObjectIndex{0} + ? 1.0f + : evaluateSampler(*fd, md->occlusionSampler, *hit).x; + + shadingState->specular = getMaterialParameter(*fd, md->specular, *hit).x; + shadingState->specularColor = + vec3(getMaterialParameter(*fd, md->specularColor, *hit)); + shadingState->useSpecular = md->useSpecular; + + shadingState->clearcoat = getMaterialParameter(*fd, md->clearcoat, *hit).x; + shadingState->clearcoatRoughness = + getMaterialParameter(*fd, md->clearcoatRoughness, *hit).x; + shadingState->clearcoatNormal = + sampleNormalMap(*fd, md->clearcoatNormalSampler, *hit, hit->Ns); + + shadingState->thickness = getMaterialParameter(*fd, md->thickness, *hit).x; + shadingState->attenuationDistance = md->attenuationDistance; + shadingState->attenuationColor = md->attenuationColor; + + shadingState->sheenColor = + vec3(getMaterialParameter(*fd, md->sheenColor, *hit)); + shadingState->sheenRoughness = + getMaterialParameter(*fd, md->sheenRoughness, *hit).x; + + shadingState->iridescence = + getMaterialParameter(*fd, md->iridescence, *hit).x; + shadingState->iridescenceIor = md->iridescenceIor; + shadingState->iridescenceThickness = + getMaterialParameter(*fd, md->iridescenceThickness, *hit).x; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateTint( +//----------------------------------------------------------------------------- +// Simple accessors +//----------------------------------------------------------------------------- + +VISRTX_CALLABLE vec3 __direct_callable__evaluateTint( const PhysicallyBasedShadingState *shadingState) { return shadingState->baseColor; } -VISRTX_CALLABLE -float __direct_callable__evaluateOpacity( +VISRTX_CALLABLE float __direct_callable__evaluateOpacity( const PhysicallyBasedShadingState *shadingState) { return shadingState->opacity; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateEmission( +VISRTX_CALLABLE vec3 __direct_callable__evaluateEmission( const PhysicallyBasedShadingState *shadingState, const vec3 *outgoingDir) { return shadingState->emission; } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateTransmission( +VISRTX_CALLABLE vec3 __direct_callable__evaluateTransmission( const PhysicallyBasedShadingState *shadingState) { - return shadingState->baseColor * shadingState->transmission * 0.85f; + return computeTransmissionFilter(shadingState); } -VISRTX_CALLABLE -vec3 __direct_callable__evaluateNormal( +VISRTX_CALLABLE vec3 __direct_callable__evaluateNormal( const PhysicallyBasedShadingState *shadingState) { return shadingState->normal; } -// Signature must match the call inside shaderPhysicallyBasedSurface in -// PhysicallyBasedShader.cuh. +//----------------------------------------------------------------------------- +// NEE shading: base (diffuse + GGX specular) + clearcoat + sheen +//----------------------------------------------------------------------------- + +VISRTX_DEVICE vec3 computeDielectricF0(const PhysicallyBasedShadingState *state) +{ + const float iorF0 = pow2((1.0f - state->ior) / (1.0f + state->ior)); + if (state->useSpecular == 0) + return vec3(iorF0); + return glm::min(vec3(iorF0) * state->specularColor, vec3(1.0f)) + * state->specular; +} + +VISRTX_DEVICE vec3 computeF0(const PhysicallyBasedShadingState *state) +{ + return glm::mix( + computeDielectricF0(state), state->baseColor, state->metallic); +} + +VISRTX_DEVICE vec3 computeF90(const PhysicallyBasedShadingState *state) +{ + const float dielectricF90 = state->useSpecular == 0 ? 1.0f : state->specular; + return glm::mix(vec3(dielectricF90), vec3(1.0f), state->metallic); +} + +VISRTX_DEVICE vec3 schlickFresnel(vec3 F0, vec3 F90, float VdotH) +{ + return F0 + (F90 - F0) * pow5(1.0f - fabsf(VdotH)); +} + VISRTX_CALLABLE vec3 __direct_callable__shadeSurface( - const PhysicallyBasedShadingState *shadingState, + const PhysicallyBasedShadingState *state, const SurfaceHit *hit, const LightSample *lightSample, const vec3 *outgoingDir) { - const float NdotL = dot(shadingState->normal, lightSample->dir); - if (NdotL <= 0.0f) - return vec3(0.0f, 0.0f, 0.0f); - - const vec3 H = normalize(lightSample->dir + *outgoingDir); - const float NdotH = dot(shadingState->normal, H); + const vec3 N = state->normal; + const vec3 V = *outgoingDir; + const vec3 L = lightSample->dir; - const float NdotV = dot(shadingState->normal, *outgoingDir); - const float VdotH = dot(*outgoingDir, H); - const float LdotH = dot(lightSample->dir, H); + const float NdotL = dot(N, L); + if (NdotL <= 0.0f) + return vec3(0.0f); + + const vec3 H = normalize(L + V); + const float NdotH = fmaxf(dot(N, H), 0.0f); + const float NdotV = fmaxf(dot(N, V), 1e-6f); + const float VdotH = fmaxf(dot(V, H), 0.0f); + + // Base F0 / F90, optionally overridden by iridescence. + vec3 F0 = computeF0(state); + vec3 F90 = computeF90(state); + vec3 F = schlickFresnel(F0, F90, VdotH); + if (state->iridescence > 0.0f && state->iridescenceThickness > 0.0f) { + const vec3 iridescent = evalIridescence( + 1.0f, state->iridescenceIor, VdotH, state->iridescenceThickness, F0); + F = glm::mix(F, iridescent, state->iridescence); + } - // Fresnel - const vec3 f0 = glm::mix( - vec3(pow2((1.f - shadingState->ior) / (1.f + shadingState->ior))), - shadingState->baseColor, - shadingState->metallic); - const vec3 F = f0 + (vec3(1.f) - f0) * pow5(1.f - fabsf(VdotH)); + // Base GGX specular lobe. + const float alpha = fmaxf(pow2(state->roughness), 1e-4f); + const float alpha2 = alpha * alpha; + const float D = ggxD(NdotH, alpha2); + const float G2 = smithG2GGX(NdotV, fmaxf(NdotL, 1e-6f), alpha2); + const vec3 specularBRDF = (F * D * G2) / (4.0f * NdotV * fmaxf(NdotL, 1e-6f)); - // Metallic materials don't reflect diffusely: + // Diffuse lobe (energy-balanced against specular, attenuated by occlusion + // and transmission; metals have no diffuse). const vec3 diffuseColor = - glm::mix(shadingState->baseColor, vec3(0.f), shadingState->metallic); - - const vec3 diffuseBRDF = - (vec3(1.f) - F) * float(M_1_PI) * diffuseColor * fmaxf(0.f, NdotL); - - // Alpha - const float alpha = pow2(shadingState->roughness) * shadingState->opacity; - - // GGX microfacet distribution - const float D = (alpha * alpha * heaviside(NdotH)) - / (float(M_PI) * pow2(NdotH * NdotH * (alpha * alpha - 1.f) + 1.f)); - - // Masking-shadowing term - const float G = - ((2.f * fabsf(NdotL) * heaviside(LdotH)) - / (fabsf(NdotL) - + sqrtf(alpha * alpha + (1.f - alpha * alpha) * NdotL * NdotL))) - * ((2.f * fabsf(NdotV) * heaviside(VdotH)) - / (fabsf(NdotV) - + sqrtf(alpha * alpha + (1.f - alpha * alpha) * NdotV * NdotV))); + glm::mix(state->baseColor, vec3(0.0f), state->metallic); + const vec3 diffuseBRDF = (vec3(1.0f) - F) * float(M_1_PI) * diffuseColor + * state->occlusion * (1.0f - state->transmission); + + vec3 base = diffuseBRDF + specularBRDF; + + // Clearcoat: a second GGX lobe with its own normal and roughness, Fresnel- + // attenuating the base layer at both view and light angles. + if (state->clearcoat > 0.0f) { + const vec3 Nc = state->clearcoatNormal; + const float NcDotV = fmaxf(dot(Nc, V), 1e-6f); + const float NcDotL = fmaxf(dot(Nc, L), 0.0f); + const float NcDotH = fmaxf(dot(Nc, H), 0.0f); + const float FcV = + CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - NcDotV); + const float FcL = + CLEARCOAT_F0 + (1.0f - CLEARCOAT_F0) * pow5(1.0f - NcDotL); + const float alphaC = fmaxf(pow2(state->clearcoatRoughness), 1e-4f); + const float alphaC2 = alphaC * alphaC; + const float Dc = ggxD(NcDotH, alphaC2); + const float Gc = smithG2GGX(NcDotV, fmaxf(NcDotL, 1e-6f), alphaC2); + const float clearcoatLobe = + (FcV * Dc * Gc) / (4.0f * NcDotV * fmaxf(NcDotL, 1e-6f)); + + const float attnV = 1.0f - state->clearcoat * FcV; + const float attnL = 1.0f - state->clearcoat * FcL; + base = base * attnV * attnL; + base += + vec3(state->clearcoat * clearcoatLobe) * NcDotL / fmaxf(NdotL, 1e-6f); + } - const float denom = 4.f * fabsf(NdotV) * fabsf(NdotL); - const vec3 specularBRDF = - denom != 0.f ? (F * D * G) / denom : vec3(0.f); + // Sheen: Charlie distribution + Ashikhmin visibility, added on top of the + // base layer without energy compensation (simple but consistent with the + // glTF reference for basic setups). + if (glm::any(glm::greaterThan(state->sheenColor, vec3(0.0f)))) { + const float alphaS = fmaxf(pow2(state->sheenRoughness), 1e-4f); + const float Ds = charlieD(NdotH, alphaS); + const float Vs = charlieV(NdotV, fmaxf(NdotL, 1e-6f)); + base += state->sheenColor * Ds * Vs; + } - // Transmission is applied only to the diffuse BRDF. This is intentional: - // In this model, transmission reduces the diffuse reflection, while specular - // reflection (surface reflection) is not affected by transmission, as it represents - // light reflected at the surface rather than transmitted through the material. - return (diffuseBRDF * (1.0f - shadingState->transmission) + specularBRDF) - * NdotL * lightSample->radiance / lightSample->pdf; + return base * NdotL * lightSample->radiance / lightSample->pdf; } +//----------------------------------------------------------------------------- +// Next-ray importance sampling: stochastic alpha, Fresnel-aware lobe pick, +// GGX VNDF reflection/refraction. Clearcoat/sheen are NEE-only (no separate +// lobe sampling), which matches what the base renderer is set up to consume. +//----------------------------------------------------------------------------- + VISRTX_CALLABLE NextRay __direct_callable__nextRay( - const PhysicallyBasedShadingState *shadingState, - const Ray *ray, - RandState *rs) -{ - // Before anything, check for opacity. If below, then we just pass through - if (curand_uniform(rs) > shadingState->opacity) - { - return NextRay{ray->dir, vec3(1.0f)}; + const PhysicallyBasedShadingState *state, const Ray *ray, RandState *rs) +{ + // Opacity pass-through (stochastic alpha): the ray continues unaltered. + if (curand_uniform(rs) > state->opacity) + return NextRay{ray->dir, vec3(1.0f), NEXT_RAY_CONTINUES_THROUGH_SURFACE}; + + const vec3 N = state->normal; + const vec3 V = -ray->dir; + const mat3 toWorld = computeOrthonormalBasis(N); + const mat3 toLocal = glm::transpose(toWorld); + const vec3 Vlocal = toLocal * V; + if (Vlocal.z <= 0.0f) + return NextRay{N, vec3(0.0f)}; + + const float alpha = fmaxf(pow2(state->roughness), 1e-4f); + const float alpha2 = alpha * alpha; + const vec3 Hlocal = + sampleGGXVNDF(Vlocal, alpha, curand_uniform(rs), curand_uniform(rs)); + + const float NdotV = Vlocal.z; + const float VdotH = fmaxf(dot(Vlocal, Hlocal), 0.0f); + + // Fresnel at the sampled microfacet, with optional iridescence. + const vec3 F0 = computeF0(state); + const vec3 F90 = computeF90(state); + vec3 F = schlickFresnel(F0, F90, VdotH); + if (state->iridescence > 0.0f && state->iridescenceThickness > 0.0f) { + const vec3 iridescent = evalIridescence( + 1.0f, state->iridescenceIor, VdotH, state->iridescenceThickness, F0); + F = glm::mix(F, iridescent, state->iridescence); + } + + const vec3 Lrefl = glm::reflect(-Vlocal, Hlocal); + const float eta = state->ior; // init() pre-inverted for front-facing hits + const vec3 Ltrans = glm::refract(-Vlocal, Hlocal, eta); + const vec3 transmissionFilter = computeTransmissionFilter(state); + const bool hasTransmission = luminance(transmissionFilter) > 0.0f; + const bool totalInternalReflection = + hasTransmission && (glm::length(Ltrans) < 1e-6f || Ltrans.z >= 0.0f); + + vec3 reflectEnergy = totalInternalReflection ? vec3(1.0f) : F; + vec3 transmitEnergy = totalInternalReflection + ? vec3(0.0f) + : glm::max(vec3(1.0f) - F, vec3(0.0f)) * transmissionFilter; + + const float reflectStrength = + fmaxf(luminance(glm::max(reflectEnergy, vec3(0.0f))), 0.0f); + const float transmitStrength = + fmaxf(luminance(glm::max(transmitEnergy, vec3(0.0f))), 0.0f); + const float combinedStrength = reflectStrength + transmitStrength; + if (combinedStrength <= 0.0f) + return NextRay{N, vec3(0.0f)}; + + const float reflectProb = reflectStrength / combinedStrength; + const bool sampleTransmission = curand_uniform(rs) > reflectProb; + + if (sampleTransmission) { + const float NdotL = -Ltrans.z; // L points through the surface. + const float G1 = smithG1GGX(NdotV, alpha2); + const float G2 = smithG2GGX(NdotV, NdotL, alpha2); + const vec3 weight = transmitEnergy * (G2 / fmaxf(G1, 1e-8f)) + / fmaxf(1.0f - reflectProb, 1e-8f); + return NextRay{normalize(toWorld * Ltrans), + weight, + NEXT_RAY_CONTINUES_THROUGH_SURFACE}; } - // Open cone, along the perfect reflection ray, with a metallic and - // roughness-dependent angle - const float roughness = shadingState->roughness; - const float metalness = shadingState->metallic; - const float roughnessSqr = roughness * roughness; - const float cosThetaMax = 1.0f - (roughnessSqr * roughnessSqr); - const float transmission = shadingState->transmission; - - bool isReflected = curand_uniform(rs) > transmission; - auto nextVector = isReflected - ? glm::reflect(ray->dir, shadingState->normal) - : glm::refract(ray->dir, shadingState->normal, shadingState->ior); - - auto nextRay = computeOrthonormalBasis(normalize(nextVector)) - * uniformSampleCone(cosThetaMax, - vec3(curand_uniform(rs), curand_uniform(rs), curand_uniform(rs))); - - auto nextSampleWeight = isReflected - ? shadingState->baseColor * metalness * (1.0f - transmission) - : shadingState->baseColor * transmission; - - return NextRay{nextRay, nextSampleWeight}; -} \ No newline at end of file + // Reflection. + if (Lrefl.z <= 0.0f) + return NextRay{N, vec3(0.0f)}; + + const float NdotL = Lrefl.z; + const float G1 = smithG1GGX(NdotV, alpha2); + const float G2 = smithG2GGX(NdotV, NdotL, alpha2); + const vec3 weight = + reflectEnergy * (G2 / fmaxf(G1, 1e-8f)) / fmaxf(reflectProb, 1e-8f); + return NextRay{normalize(toWorld * Lrefl), weight}; +} diff --git a/devices/rtx/device/renderer/Debug_ptx.cu b/devices/rtx/device/renderer/Debug_ptx.cu index 8c6a1f2af..372e6ae13 100644 --- a/devices/rtx/device/renderer/Debug_ptx.cu +++ b/devices/rtx/device/renderer/Debug_ptx.cu @@ -237,7 +237,10 @@ VISRTX_GLOBAL void __raygen__() auto ray = makePrimaryRay(ss, true /*pixel centered*/); - auto color = vec3(getBackgroundImage(frameData.renderer, ss.screen)); + vec3 color{0.f}; + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + color = hdri; + } auto depth = ray.t.upper; auto normal = ray.dir; uint32_t primID = ~0u; diff --git a/devices/rtx/device/renderer/Interactive_ptx.cu b/devices/rtx/device/renderer/Interactive_ptx.cu index 9678c7c8d..c75d0a0ad 100644 --- a/devices/rtx/device/renderer/Interactive_ptx.cu +++ b/devices/rtx/device/renderer/Interactive_ptx.cu @@ -33,6 +33,7 @@ #include "gpu/evalShading.h" #include "gpu/gpu_math.h" #include "gpu/gpu_objects.h" +#include "gpu/gpu_util.h" #include "gpu/intersectRay.h" #include "gpu/renderer/common.h" #include "gpu/renderer/raygen_helpers.h" @@ -124,13 +125,10 @@ struct InteractiveShadingPolicy NextRay nextRay = materialNextRay(shadingState, ray, ss.rs); if (glm::any(glm::greaterThan( nextRay.contributionWeight, glm::vec3(MIN_CONTRIBUTION_EPSILON)))) { + const float side = continuesThroughSurface(nextRay) ? -1.0f : 1.0f; Ray bounceRay = { - bounceHit.hitpoint - + bounceHit.Ng - * std::copysignf( - bounceHit.epsilon, dot(bounceHit.Ns, nextRay.direction)), - normalize(nextRay.direction), - }; + bounceHit.hitpoint + bounceHit.Ng * bounceHit.epsilon * side, + normalize(nextRay.direction)}; // Only check for intersecting surfaces and background as secondary light // interactions @@ -150,14 +148,13 @@ struct InteractiveShadingPolicy * rendererParams.ambientColor * rendererParams.ambientIntensity; contrib += color * nextRay.contributionWeight; } else { - if (vec3 hdri; getBackgroundLight(frameData, bounceRay.dir, hdri)) { - contrib += vec3(hdri) * nextRay.contributionWeight; - } + vec3 hdri; + if (getBackgroundLight(frameData, bounceRay.dir, hdri)) + contrib += hdri * nextRay.contributionWeight; } } - float opacity = evaluateOpacity(shadingState); - return vec4(contrib, opacity); + return vec4(contrib, evaluateOpacity(shadingState)); } }; diff --git a/devices/rtx/device/renderer/Quality_ptx.cu b/devices/rtx/device/renderer/Quality_ptx.cu index e1a2b36f6..fe319255c 100644 --- a/devices/rtx/device/renderer/Quality_ptx.cu +++ b/devices/rtx/device/renderer/Quality_ptx.cu @@ -77,15 +77,13 @@ struct SampleDetails VISRTX_DEVICE void accumPixelSample(const FrameGPUData &frame, const uvec2 &pixel, - const SampleDetails &sample, - const int frameIDOffset = 0) + const SampleDetails &sample) { accumPixelSample(frame, pixel, vec4(sample.color, sample.opacity), sample.albedo, - sample.normal, - frameIDOffset); + sample.normal); } VISRTX_DEVICE vec3 surfaceAttenuation(ScreenSample &ss, Ray r) @@ -143,27 +141,29 @@ VISRTX_DEVICE LightSample sampleLights(ScreenSample &ss, // curand_uniform returns (0,1], invert to get [0,numLights). // Clamp to handle float rounding when curand returns a subnormal. - const size_t selectedIdx = glm::min( - size_t((1.0f - curand_uniform(&ss.rs)) * float(numLights)), - numLights - 1); + const size_t selectedIdx = + glm::min(size_t((1.0f - curand_uniform(&ss.rs)) * float(numLights)), + numLights - 1); - const float radianceWeight = float(numLights); + // Uniform light pick: P(light) = 1/numLights. Fold that into the returned + // pdf rather than into radiance so MIS weights see the full joint pdf + // P(dir, light) = P(dir | light) * (1/numLights). + const float lightPickPdf = 1.0f / float(numLights); // last index is reserved for ambient light if it exists if (selectedIdx == world.numLightInstances) { const auto &rendererParams = frameData.renderer; return LightSample{ - radianceWeight * rendererParams.ambientColor - * rendererParams.ambientIntensity, + rendererParams.ambientColor * rendererParams.ambientIntensity, sampleHemisphere(ss.rs, normal), std::numeric_limits::max(), - 1.0f / (2.0f * float(M_PI)), + lightPickPdf / (2.0f * float(M_PI)), }; } else { const auto &lightInstance = world.lightInstances[selectedIdx]; auto ls = sampleLight(ss, origin, lightInstance.lightIndex, lightInstance.xfm); - ls.radiance *= radianceWeight; + ls.pdf *= lightPickPdf; return ls; } } @@ -271,7 +271,6 @@ VISRTX_GLOBAL void __raygen__() vec3(0.0f), 0.0f, vec3(0.0f), ray.t.upper, vec3(0.0f)}; auto sampleContribution = vec3(1.0f); - bool firstHitAssigned = false; for (int d = 0; d < qualityParams.maxRayDepth; ++d) { const bool isFirstBounce = d == 0; @@ -317,7 +316,7 @@ VISRTX_GLOBAL void __raygen__() if (shouldTerminatePath(ss, d, sampleContribution, true)) break; - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, volumeSample.depth, @@ -330,7 +329,6 @@ VISRTX_GLOBAL void __raygen__() ? volumeSample.normal : -ray.dir; sample.normal = volumeNormal; - firstHitAssigned = true; } const vec3 scatterDir = randomDir(ss.rs); @@ -348,7 +346,7 @@ VISRTX_GLOBAL void __raygen__() const vec3 materialTint = materialEvaluateTint(shadingState); const float materialOpacity = materialEvaluateOpacity(shadingState); - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, surfaceHit.t, @@ -358,7 +356,6 @@ VISRTX_GLOBAL void __raygen__() sample.depth = surfaceHit.t; sample.normal = materialEvaluateNormal(shadingState); sample.albedo = materialTint; - firstHitAssigned = true; } sample.color += sampleContribution * materialEmission * materialOpacity; @@ -382,29 +379,28 @@ VISRTX_GLOBAL void __raygen__() } } - accumulateValue(sample.opacity, materialOpacity, sample.opacity); - auto nextRay = materialNextRay(shadingState, ray, ss.rs); sampleContribution *= nextRay.contributionWeight; + if (!continuesThroughSurface(nextRay)) + accumulateValue(sample.opacity, 1.0f, sample.opacity); + if (shouldTerminatePath(ss, d, sampleContribution, true)) break; - ray = Ray{ - surfaceHit.hitpoint - + surfaceHit.Ng - * std::copysignf(surfaceHit.epsilon, - dot(surfaceHit.Ns, nextRay.direction)), - normalize(vec3(nextRay.direction)), - }; + const float side = continuesThroughSurface(nextRay) ? -1.0f : 1.0f; + ray = + Ray{surfaceHit.hitpoint + surfaceHit.Ng * surfaceHit.epsilon * side, + normalize(vec3(nextRay.direction))}; } - if (!surfaceHit.foundHit) { - const auto bg = getBackground(frameData, ss.screen, ray.dir); - sample.color += sampleContribution * vec3(bg) * bg.a; - accumulateValue(sample.opacity, bg.a, sample.opacity); + if (!surfaceHit.foundHit && !volumeSample.didScatter) { + if (vec3 hdri; getBackgroundLight(frameData, ray.dir, hdri)) { + sample.color += sampleContribution * hdri; + accumulateValue(sample.opacity, 1.f, sample.opacity); + } - if (isFirstBounce && !firstHitAssigned) { + if (isFirstBounce) { setPixelIds(frameData.fb, ss.pixel, ray.t.upper, ~0u, ~0u, ~0u); } @@ -412,7 +408,7 @@ VISRTX_GLOBAL void __raygen__() } } - accumPixelSample(frameData, ss.pixel, sample, i); + accumPixelSample(frameData, ss.pixel, sample); } } diff --git a/devices/rtx/device/renderer/Renderer.cpp b/devices/rtx/device/renderer/Renderer.cpp index 7b87c161e..6e7bba2cb 100644 --- a/devices/rtx/device/renderer/Renderer.cpp +++ b/devices/rtx/device/renderer/Renderer.cpp @@ -170,7 +170,8 @@ void Renderer::commitParameters() m_cullTriangleBF = getParam("cullTriangleBackfaces", false); m_volumeSamplingRate = std::clamp(getParam("volumeSamplingRate", 0.125f), 1e-3f, 10.f); - m_premultiplyBackground = getParam("premultiplyBackground", false); + m_premultipliedAlpha = getParam( + "premultipliedAlpha", getParam("premultiplyBackground", false)); m_cutPlane = getParam("cutPlane", vec4(0.f)); if (m_checkerboard) m_spp = 1; @@ -211,7 +212,7 @@ void Renderer::populateFrameData(FrameGPUData &fd) const fd.renderer.fireflyFilter = m_fireflyFilter; fd.renderer.inverseVolumeSamplingRate = 1.f / m_volumeSamplingRate; fd.renderer.numIterations = std::max(m_spp, 1); - fd.renderer.premultiplyBackground = m_premultiplyBackground; + fd.renderer.premultipliedAlpha = m_premultipliedAlpha; fd.renderer.cutPlane = m_cutPlane; } diff --git a/devices/rtx/device/renderer/Renderer.h b/devices/rtx/device/renderer/Renderer.h index 07ea51883..219c2ab47 100644 --- a/devices/rtx/device/renderer/Renderer.h +++ b/devices/rtx/device/renderer/Renderer.h @@ -94,7 +94,7 @@ struct Renderer : public Object true}; // enable internal tonemapping during sample accumulation int m_sampleLimit{0}; bool m_cullTriangleBF{false}; - bool m_premultiplyBackground{false}; + bool m_premultipliedAlpha{false}; float m_volumeSamplingRate{1.f}; vec4 m_cutPlane{0.f}; diff --git a/devices/rtx/device/visrtx_device.json b/devices/rtx/device/visrtx_device.json index f7fcfe613..3c26c1287 100644 --- a/devices/rtx/device/visrtx_device.json +++ b/devices/rtx/device/visrtx_device.json @@ -105,13 +105,13 @@ "description": "suppress fireflies via reversible tonemapping before accumulation" }, { - "name": "premultiplyBackground", + "name": "premultipliedAlpha", "types": [ "ANARI_BOOL" ], "tags": [], "default": false, - "description": "pre-multiply alpha channel with background color" + "description": "pre-multiply RGB by alpha in the composited output pixel" }, { "name": "cullTriangleBackfaces", @@ -266,13 +266,13 @@ "description": "suppress fireflies via reversible tonemapping before accumulation" }, { - "name": "premultiplyBackground", + "name": "premultipliedAlpha", "types": [ "ANARI_BOOL" ], "tags": [], "default": false, - "description": "pre-multiply alpha channel with background color" + "description": "pre-multiply RGB by alpha in the composited output pixel" }, { "name": "cullTriangleBackfaces", @@ -406,6 +406,15 @@ "default": true, "description": "suppress fireflies via reversible tonemapping before accumulation" }, + { + "name": "premultipliedAlpha", + "types": [ + "ANARI_BOOL" + ], + "tags": [], + "default": false, + "description": "pre-multiply RGB by alpha in the composited output pixel" + }, { "name": "cullTriangleBackfaces", "types": [