From e2aa2bbcd549aae7782b012be9e89569a58956f8 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 01:05:48 +0100 Subject: [PATCH 01/54] add creation params and geoemtrycreator reference to IAssetLoaderOverride, also ecapsulate better --- include/nbl/asset/IAssetManager.h | 2 +- include/nbl/asset/interchange/IAssetLoader.h | 31 ++++++++++++++++++-- src/nbl/asset/interchange/IAssetLoader.cpp | 8 ++--- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/IAssetManager.h b/include/nbl/asset/IAssetManager.h index d9995526bc..45b32b7c61 100644 --- a/include/nbl/asset/IAssetManager.h +++ b/include/nbl/asset/IAssetManager.h @@ -122,7 +122,7 @@ class NBL_API2 IAssetManager : public core::IReferenceCounted explicit IAssetManager(core::smart_refctd_ptr&& system, core::smart_refctd_ptr&& compilerSet = nullptr) : m_system(std::move(system)), m_compilerSet(std::move(compilerSet)), - m_defaultLoaderOverride(this) + m_defaultLoaderOverride({.manager=this}) { assert(IPreHashed::INVALID_HASH == static_cast(core::blake3_hasher{})); initializeMeshTools(); diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index a194f0e13e..14c84f400b 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -11,6 +11,7 @@ #include "nbl/system/ILogger.h" #include "nbl/asset/interchange/SAssetBundle.h" +#include "nbl/asset/utils/CGeometryCreator.h" namespace nbl::asset @@ -171,14 +172,35 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted protected: constexpr static inline bool ASSET_MUTABILITY_ON_CACHE_INSERT = true; - IAssetManager* m_manager; system::ISystem* m_system; public: - NBL_API2 IAssetLoaderOverride(IAssetManager* _manager); + struct SCreationParams + { + IAssetManager* manager = nullptr; + core::smart_refctd_ptr geoCreator = nullptr; + //core::smart_refctd_ptr polyGeoManip = nullptr; + }; + NBL_API2 IAssetLoaderOverride(SCreationParams&& params); + + // + inline IAssetManager* getManager() const {return m_creationParams.manager;} // - inline IAssetManager* getManager() const {return m_manager;} + inline CGeometryCreator* getGeometryCreator() + { + if (!m_creationParams.geoCreator) + m_creationParams.geoCreator = core::make_smart_refctd_ptr(); + return m_creationParams.geoCreator.get(); + } + + /* + inline CPolygonGeometryManipulator* getPolygonGeometryManipulator() + { + if (!m_creationParams.geoCreator) + m_creationParams.geoCreator = core::make_smart_refctd_ptr(); + return m_creationParams.polyGeoManip.get(); + }*/ //! template @@ -274,6 +296,9 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted //! After a successful load of an asset or sub-asset //TODO change name virtual void insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel); + + private: + SCreationParams m_creationParams; }; public: diff --git a/src/nbl/asset/interchange/IAssetLoader.cpp b/src/nbl/asset/interchange/IAssetLoader.cpp index 9a881b300b..4a9a8f0378 100644 --- a/src/nbl/asset/interchange/IAssetLoader.cpp +++ b/src/nbl/asset/interchange/IAssetLoader.cpp @@ -10,7 +10,7 @@ using namespace nbl::core; using namespace nbl::asset; // todo NEED DOCS -IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(IAssetManager* _manager) : m_manager(_manager), m_system(m_manager->getSystem()) +IAssetLoader::IAssetLoaderOverride::IAssetLoaderOverride(SCreationParams&& params) : m_creationParams(std::move(params)) { } @@ -20,7 +20,7 @@ SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::stri if ((levelFlag & ECF_DUPLICATE_TOP_LEVEL) == ECF_DUPLICATE_TOP_LEVEL) return {}; - auto found = m_manager->findAssets(inSearchKey, inAssetTypes); + auto found = getManager()->findAssets(inSearchKey, inAssetTypes); if (!found->size()) return handleSearchFail(inSearchKey, ctx, hierarchyLevel); return chooseRelevantFromFound(found->begin(), found->end(), ctx, hierarchyLevel); @@ -28,11 +28,11 @@ SAssetBundle IAssetLoader::IAssetLoaderOverride::findCachedAsset(const std::stri void IAssetLoader::IAssetLoaderOverride::insertAssetIntoCache(SAssetBundle& asset, const std::string& supposedKey, const SAssetLoadParams& _params, const uint32_t hierarchyLevel) { - m_manager->changeAssetKey(asset, supposedKey); + getManager()->changeAssetKey(asset, supposedKey); auto levelFlag = _params.cacheFlags >> (uint64_t(hierarchyLevel) * 2ull); if (!(levelFlag&ECF_DONT_CACHE_TOP_LEVEL)) - m_manager->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); + getManager()->insertAssetIntoCache(asset,ASSET_MUTABILITY_ON_CACHE_INSERT); } SAssetBundle IAssetLoader::interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override) From ef14edf20a196e22c3e3e058ee50f9f639515ac7 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 01:10:41 +0100 Subject: [PATCH 02/54] Add a primitive reveral mode to `IIndexingCallback::SContext` Make hash recompute disable-able in smooth normal calculation fix some bugs like bad ListIndexingCallbacks and missing return values --- include/nbl/asset/IGeometryCollection.h | 3 +- include/nbl/asset/IPolygonGeometry.h | 79 +++++++++++-------- src/nbl/asset/ICPUPolygonGeometry.cpp | 8 +- .../asset/utils/CSmoothNormalGenerator.cpp | 30 ++++--- src/nbl/asset/utils/CSmoothNormalGenerator.h | 25 +++--- 5 files changed, 85 insertions(+), 60 deletions(-) diff --git a/include/nbl/asset/IGeometryCollection.h b/include/nbl/asset/IGeometryCollection.h index 1fac15ec17..3d62ad53cf 100644 --- a/include/nbl/asset/IGeometryCollection.h +++ b/include/nbl/asset/IGeometryCollection.h @@ -34,8 +34,7 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted if (jointRedirectView.getElementCount()getJointCount()) return false; } - else - return true; + return true; } inline bool hasTransform() const {return !core::isnan(transform[0][0]);} diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index d23d503de4..023c9e462a 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -10,6 +10,7 @@ #include "nbl/asset/IAccelerationStructure.h" #include +#include namespace nbl::asset { @@ -39,41 +40,53 @@ class IPolygonGeometryBase : public virtual core::IReferenceCounted template requires (sizeof(OutT)<8 && hlsl::concepts::UnsignedIntegralScalar) struct SContext final { - // `indexOfIndex` is somewhat of a baseIndex - template - inline void streamOut(const uint32_t indexOfIndex, const Range& permutation) - { - auto& typedOut = reinterpret_cast(out); - if (indexBuffer) - switch (indexSize) + private: + // `indexOfIndex` is somewhat of a baseIndex + template + inline void impl_streamOut(const uint32_t indexOfIndex, const Range permutation) { - case 1: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - case 2: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - case 4: - for (const auto relIx : permutation) - *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; - break; - default: - assert(false); - break; + auto& typedOut = reinterpret_cast(out); + if (indexBuffer) + switch (indexSize) + { + case 1: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + case 2: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + case 4: + for (const auto relIx : permutation) + *(typedOut++) = reinterpret_cast(indexBuffer)[indexOfIndex+relIx]; + break; + default: + assert(false); + break; + } + else + for (const auto relIx : permutation) + *(typedOut++) = indexOfIndex+relIx; } - else - for (const auto relIx : permutation) - *(typedOut++) = indexOfIndex+relIx; - } - - // always the base pointer, doesn't get advanced - const void* const indexBuffer; - const uint64_t indexSize : 3; - const uint64_t beginPrimitive : 30; - const uint64_t endPrimitive : 31; - void* out; + + public: + template requires std::ranges::bidirectional_range + inline void streamOut(const uint32_t indexOfIndex, const V& permutation) + { + if (reversePrims) + impl_streamOut(indexOfIndex,permutation|std::views::reverse); + else + impl_streamOut(indexOfIndex,permutation); + } + + // always the base pointer, doesn't get advanced + const void* const indexBuffer; + const uint64_t indexSize : 3; + const uint64_t beginPrimitive : 30; + const uint64_t endPrimitive : 30; + const uint64_t reversePrims : 1 = false; + void* out; }; // could have been a static if not virtual virtual void operator()(SContext& ctx) const = 0; diff --git a/src/nbl/asset/ICPUPolygonGeometry.cpp b/src/nbl/asset/ICPUPolygonGeometry.cpp index 1acff42876..1af3c8c5d6 100644 --- a/src/nbl/asset/ICPUPolygonGeometry.cpp +++ b/src/nbl/asset/ICPUPolygonGeometry.cpp @@ -12,8 +12,8 @@ class CListIndexingCB final : public IPolygonGeometryBase::IIndexingCallback template static void operator_impl(SContext& ctx) { - auto indexOfIndex = ctx.beginPrimitive*3; - for (const auto end=ctx.endPrimitive*3; indexOfIndex!=end; indexOfIndex+=3) + auto indexOfIndex = ctx.beginPrimitive*Order; + for (const auto end=ctx.endPrimitive*Order; indexOfIndex!=end; indexOfIndex+=Order) ctx.streamOut(indexOfIndex,std::ranges::iota_view{0,int(Order)}); } @@ -70,7 +70,7 @@ class CTriangleStripIndexingCB final : public IPolygonGeometryBase::IIndexingCal indexOfIndex = ctx.beginPrimitive+2; const int32_t perm[] = {-1,-2,0}; for (const auto end=ctx.endPrimitive+2; indexOfIndex!=end; indexOfIndex++) - ctx.streamOut(indexOfIndex,perm); + ctx.streamOut>(indexOfIndex,perm); } public: @@ -106,7 +106,7 @@ class CTriangleFanIndexingCB final : public IPolygonGeometryBase::IIndexingCallb { // first index is always global 0 perm[0] = -indexOfIndex; - ctx.streamOut(indexOfIndex,perm); + ctx.streamOut>(indexOfIndex,perm); } } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp index f8bc45a317..a1884bd191 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.cpp +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.cpp @@ -29,13 +29,18 @@ static bool compareVertexPosition(const hlsl::float32_t3& a, const hlsl::float32 return (difference.x <= epsilon && difference.y <= epsilon && difference.z <= epsilon); } -CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp) +CSmoothNormalGenerator::Result CSmoothNormalGenerator::calculateNormals(const asset::ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction vxcmp, const bool recomputeHash) { assert(polygon->getIndexingCallback()->degree() == 3); + static constexpr auto MinEpsilon = 0.00001f; const auto patchedEpsilon = epsilon < MinEpsilon ? MinEpsilon : epsilon; VertexHashMap vertexHashMap = setupData(polygon, patchedEpsilon); + const auto smoothPolygon = processConnectedVertices(polygon, vertexHashMap, patchedEpsilon,vxcmp); + if (recomputeHash) + CPolygonGeometryManipulator::recomputeContentHashes(smoothPolygon.get()); + return { vertexHashMap, smoothPolygon }; } @@ -51,18 +56,25 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as { //calculate face normal of parent triangle hlsl::float32_t3 v0, v1, v2; + // TODO: could iterate over an index buffer properly polygon->getPositionView().decodeElement(i, v0); polygon->getPositionView().decodeElement(i + 1, v1); polygon->getPositionView().decodeElement(i + 2, v2); - const auto faceNormal = normalize(cross(v1 - v0, v2 - v0)); + auto faceNormal = cross(v1 - v0, v2 - v0); + // if any triangle edge is 0 length, the cross product will be 0 length too + const float normLen2 = dot(faceNormal,faceNormal); + // need to filter invalid triangles while we're at it + if (normLen2::min) + continue; + faceNormal *= hlsl::rsqrt(normLen2); //set data for m_vertices - const auto angleWages = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); + const auto angleWeights = hlsl::shapes::util::anglesFromTriangleEdges(v2 - v1, v0 - v2, v1 - v2); - vertices.add({ i, 0, faceNormal * angleWages.x, v0}); - vertices.add({ i + 1, 0, faceNormal * angleWages.y,v1}); - vertices.add({ i + 2, 0, faceNormal * angleWages.z, v2}); + vertices.add({ i, 0, faceNormal * angleWeights.x, v0}); + vertices.add({ i + 1, 0, faceNormal * angleWeights.y,v1}); + vertices.add({ i + 2, 0, faceNormal * angleWeights.z, v2}); } vertices.bake(); @@ -70,14 +82,16 @@ CSmoothNormalGenerator::VertexHashMap CSmoothNormalGenerator::setupData(const as return vertices; } -core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, float epsilon, VxCmpFunction vxcmp) +core::smart_refctd_ptr CSmoothNormalGenerator::processConnectedVertices(const asset::ICPUPolygonGeometry* polygon, VertexHashMap& vertexHashMap, const float epsilon, VxCmpFunction vxcmp) { + // TODO: its semi doable to defer unwelding/rewelding until later an just work on a duplicated normal buffer only auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); static constexpr auto NormalFormat = EF_R32G32B32_SFLOAT; const auto normalFormatBytesize = asset::getTexelOrBlockBytesize(NormalFormat); auto normalBuf = ICPUBuffer::create({ normalFormatBytesize * outPolygon->getPositionView().getElementCount()}); auto normalView = polygon->getNormalView(); + // TODO: compute actual range hlsl::shapes::AABB<4,hlsl::float32_t> aabb; aabb.maxVx = hlsl::float32_t4(1, 1, 1, 0.f); aabb.minVx = -aabb.maxVx; @@ -118,8 +132,6 @@ core::smart_refctd_ptr CSmoothNormalGenerator::processConne memcpy(normalPtr + (normalStride * processedVertex.index), &normal, sizeof(normal)); } - CPolygonGeometryManipulator::recomputeContentHashes(outPolygon.get()); - return outPolygon; } diff --git a/src/nbl/asset/utils/CSmoothNormalGenerator.h b/src/nbl/asset/utils/CSmoothNormalGenerator.h index 6ac4daf6c4..7c9bf5358f 100644 --- a/src/nbl/asset/utils/CSmoothNormalGenerator.h +++ b/src/nbl/asset/utils/CSmoothNormalGenerator.h @@ -11,18 +11,20 @@ namespace nbl::asset { // TODO: implement a class template that take position type(either float32_t3 or float64_t3 as template argument -class CSmoothNormalGenerator +class CSmoothNormalGenerator final { public: CSmoothNormalGenerator() = delete; ~CSmoothNormalGenerator() = delete; - struct VertexData - { - uint32_t index; //offset of the vertex into index buffer + struct VertexData + { + //offset of the vertex into index buffer + uint32_t index; uint32_t hash; - hlsl::float32_t3 weightedNormal; - hlsl::float32_t3 position; //position of the vertex in 3D space + hlsl::float32_t3 weightedNormal; + //position of the vertex in 3D space + hlsl::float32_t3 position; hlsl::float32_t3 getPosition() const { @@ -39,21 +41,20 @@ class CSmoothNormalGenerator return hash; }; - }; + }; using VxCmpFunction = std::function; using VertexHashMap = CVertexHashGrid; - struct Result - { + struct Result + { VertexHashMap vertexHashGrid; core::smart_refctd_ptr geom; - }; - static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function); + }; + static Result calculateNormals(const ICPUPolygonGeometry* polygon, float epsilon, VxCmpFunction function, const bool recomputeHash=true); private: - static VertexHashMap setupData(const ICPUPolygonGeometry* polygon, float epsilon); static core::smart_refctd_ptr processConnectedVertices(const ICPUPolygonGeometry* polygon, VertexHashMap& vertices, float epsilon, VxCmpFunction vxcmp); }; From 0136b636767fd1f1db903c140c64583aa584ea8c Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 01:14:33 +0100 Subject: [PATCH 03/54] make hash recompute optinal on all mesh manipulation functions add primitive reversal modes to smooth unwelding and conversion to list indexing methods, this way we have --- .../asset/utils/CPolygonGeometryManipulator.h | 156 +---- include/nbl/asset/utils/CVertexWelder.h | 659 +++++++++--------- .../utils/CPolygonGeometryManipulator.cpp | 557 ++++----------- 3 files changed, 502 insertions(+), 870 deletions(-) diff --git a/include/nbl/asset/utils/CPolygonGeometryManipulator.h b/include/nbl/asset/utils/CPolygonGeometryManipulator.h index 4a31bd6a95..c3bed0e49e 100644 --- a/include/nbl/asset/utils/CPolygonGeometryManipulator.h +++ b/include/nbl/asset/utils/CPolygonGeometryManipulator.h @@ -128,27 +128,28 @@ class NBL_API2 CPolygonGeometryManipulator const_cast(geo->getAABBStorage()) = computeAABB(geo); } - static inline core::smart_refctd_ptr createTriangleListIndexing(const ICPUPolygonGeometry* geo) + static inline core::smart_refctd_ptr createTriangleListIndexing(const ICPUPolygonGeometry* geo, const bool reverse=false, const bool recomputeHash=true) { const auto* indexing = geo->getIndexingCallback(); - if (!indexing) return nullptr; - if (indexing->degree() != 3) return nullptr; + if (!indexing || indexing->degree()!=3) // TODO: why just triangle? + return nullptr; const auto originalView = geo->getIndexView(); - const auto originalIndexSize = originalView ? originalView.composed.stride : 0; + const auto originalIndexSize = originalView ? originalView.composed.stride:0; const auto primCount = geo->getPrimitiveCount(); const auto maxIndex = geo->getPositionView().getElementCount() - 1; const uint8_t indexSize = maxIndex <= std::numeric_limits::max() ? sizeof(uint16_t) : sizeof(uint32_t); const auto outGeometry = core::move_and_static_cast(geo->clone(0u)); - if (indexing && indexing->knownTopology() == EPT_TRIANGLE_LIST) + if (indexing->knownTopology()==EPT_TRIANGLE_LIST && !reverse) return outGeometry; auto* outGeo = outGeometry.get(); const auto indexBufferUsages = [&] { - if (originalView) return originalView.src.buffer->getUsageFlags(); + if (originalView) + return originalView.src.buffer->getUsageFlags(); return core::bitflag(IBuffer::EUF_INDEX_BUFFER_BIT); }(); auto indexBuffer = ICPUBuffer::create({ primCount * indexing->degree() * indexSize, indexBufferUsages }); @@ -173,7 +174,8 @@ class NBL_API2 CPolygonGeometryManipulator .indexSize = originalIndexSize, .beginPrimitive = 0, .endPrimitive = primCount, - .out = indexBufferPtr, + .reversePrims = reverse, + .out = indexBufferPtr }; indexing->operator()(context); @@ -190,7 +192,8 @@ class NBL_API2 CPolygonGeometryManipulator .indexSize = originalIndexSize, .beginPrimitive = 0, .endPrimitive = primCount, - .out = indexBufferPtr, + .reversePrims = reverse, + .out = indexBufferPtr }; indexing->operator()(context); @@ -209,11 +212,35 @@ class NBL_API2 CPolygonGeometryManipulator outGeo->setIndexing(IPolygonGeometryBase::TriangleList()); outGeo->setIndexView(std::move(indexView)); - CGeometryManipulator::recomputeContentHash(outGeo->getIndexView()); + + if (recomputeHash) + CGeometryManipulator::recomputeContentHash(outGeo->getIndexView()); return outGeometry; } + template + requires (std::same_as, hlsl::float32_t3>) + static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon = 1.525e-5f) + { + return COBBGenerator::compute(vertexCount, std::forward(fetchFn), epsilon); + } + + static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse=false, const bool recomputeHash=true); + + using SSNGVertexData = CSmoothNormalGenerator::VertexData; + using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; + // NOTE: Requires unwelded mesh on input, TODO make it resillient against that (only unweld normals temporarily, maybe even avoid position unweld) + static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, const bool enableWelding=false, float epsilon=1.525e-5f, + SSNGVxCmpFunction vxcmp=[](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + { + constexpr float cosOf45Deg = 0.70710678118f; + return hlsl::dot(v0.weightedNormal,v1.weightedNormal)*hlsl::rsqrt(hlsl::dot(v0.weightedNormal,v0.weightedNormal)*hlsl::dot(v1.weightedNormal,v1.weightedNormal)) > cosOf45Deg; + }, + const bool recomputeHash = true + ); + + //! Comparison methods enum E_ERROR_METRIC { @@ -232,26 +259,6 @@ class NBL_API2 CPolygonGeometryManipulator EEM_QUATERNION, EEM_COUNT }; - - template - requires (std::same_as, hlsl::float32_t3>) - static inline hlsl::shapes::OBB<3, hlsl::float32_t> calculateOBB(size_t vertexCount, FetchVertexFn&& fetchFn, float epsilon = 1.525e-5f) - { - return COBBGenerator::compute(vertexCount, std::forward(fetchFn), epsilon); - } - - static core::smart_refctd_ptr createUnweldedList(const ICPUPolygonGeometry* inGeo); - - using SSNGVertexData = CSmoothNormalGenerator::VertexData; - using SSNGVxCmpFunction = CSmoothNormalGenerator::VxCmpFunction; - - static core::smart_refctd_ptr createSmoothVertexNormal(const ICPUPolygonGeometry* inbuffer, bool enableWelding = false, float epsilon = 1.525e-5f, - SSNGVxCmpFunction vxcmp = [](const SSNGVertexData& v0, const SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) - { - constexpr float cosOf45Deg = 0.70710678118f; - return dot(normalize(v0.weightedNormal),normalize(v1.weightedNormal)) > cosOf45Deg; - }); - #if 0 // TODO: REDO //! Struct used to pass chosen comparison method and epsilon to functions performing error metrics. /** @@ -385,24 +392,6 @@ class NBL_API2 CPolygonGeometryManipulator */ static core::smart_refctd_ptr idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle strip. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - - //! Creates index buffer from input converting it to indices for triangle list primitives. Input is assumed to be indices for triangle fan. - /** - @param _input Input index buffer's data. - @param _idxCount Index count. - @param _inIndexType Type of input index buffer data (32bit or 16bit). - @param _outIndexType Type of output index buffer data (32bit or 16bit). - */ - static core::smart_refctd_ptr idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType); - //! static inline std::array getTriangleIndices(const ICPUMeshBuffer* mb, uint32_t triangleIx) { @@ -606,40 +595,6 @@ class NBL_API2 CPolygonGeometryManipulator return aabb; } - //! Recalculates the cached bounding box of the meshbuffer - static inline void recalculateBoundingBox(ICPUMeshBuffer* meshbuffer) - { - meshbuffer->setBoundingBox(calculateBoundingBox(meshbuffer,meshbuffer->getJointAABBs())); - } - - //! Flips the direction of surfaces. - /** Changes backfacing triangles to frontfacing - triangles and vice versa. - \param mesh Mesh on which the operation is performed. */ - static void flipSurfaces(ICPUMeshBuffer* inbuffer); - - //! Creates a copy of a mesh with all vertices unwelded - /** \param mesh Input mesh - \return Mesh consisting only of unique faces. All vertices - which were previously shared are now duplicated. */ - static core::smart_refctd_ptr createMeshBufferUniquePrimitives(ICPUMeshBuffer* inbuffer, bool _makeIndexBuf = false); - - // - static core::smart_refctd_ptr calculateSmoothNormals(ICPUMeshBuffer* inbuffer, bool makeNewMesh = false, float epsilon = 1.525e-5f, - uint32_t normalAttrID = 3u, - VxCmpFunction vxcmp = [](const IMeshManipulator::SSNGVertexData& v0, const IMeshManipulator::SSNGVertexData& v1, ICPUMeshBuffer* buffer) - { - static constexpr float cosOf45Deg = 0.70710678118f; - return dot(v0.parentTriangleFaceNormal,v1.parentTriangleFaceNormal)[0] > cosOf45Deg; - }); - - - //! Creates a copy of a mesh with vertices welded - /** \param mesh Input mesh - \param errMetrics Array of size EVAI_COUNT. Describes error metric for each vertex attribute (used if attribute is of floating point or normalized type). - \param tolerance The threshold for vertex comparisons. - \return Mesh without redundant vertices. */ - static core::smart_refctd_ptr createMeshBufferWelded(ICPUMeshBuffer *inbuffer, const SErrorMetric* errMetrics, const bool& optimIndexType = true, const bool& makeNewMesh = false); //! Throws meshbuffer into full optimizing pipeline consisting of: vertices welding, z-buffer optimization, vertex cache optimization (Forsyth's algorithm), fetch optimization and attributes requantization. A new meshbuffer is created unless given meshbuffer doesn't own (getMeshDataAndFormat()==NULL) a data format descriptor. /**@return A new meshbuffer or NULL if an error occured. */ @@ -864,45 +819,6 @@ class CMeshManipulator : public IMeshManipulator return output; } - template - static inline core::smart_refctd_ptr triangleStripsToTriangles(const void* _input, uint32_t& _idxCount) - { - const auto outputSize = _idxCount = (_idxCount - 2) * 3; - - auto output = ICPUBuffer::create({ sizeof(OutType)*outputSize }); - const auto* iptr = reinterpret_cast(_input); - auto* optr = reinterpret_cast(output->getPointer()); - for (uint32_t i = 0, j = 0; i < outputSize; j += 2) - { - optr[i++] = iptr[j + 0]; - optr[i++] = iptr[j + 1]; - optr[i++] = iptr[j + 2]; - if (i == outputSize) - break; - optr[i++] = iptr[j + 2]; - optr[i++] = iptr[j + 1]; - optr[i++] = iptr[j + 3]; - } - return output; - } - - template - static inline core::smart_refctd_ptr trianglesFanToTriangles(const void* _input, uint32_t& _idxCount) - { - const auto outputSize = _idxCount = (_idxCount - 2) * 3; - - auto output = ICPUBuffer::create({ sizeof(OutType)*outputSize }); - const auto* iptr = reinterpret_cast(_input); - auto* optr = reinterpret_cast(output->getPointer()); - for (uint32_t i = 0, j = 1; i < outputSize;) - { - optr[i++] = iptr[0]; - optr[i++] = iptr[j++]; - optr[i++] = iptr[j]; - } - return output; - } - private: CQuantNormalCache quantNormalCache; CQuantQuaternionCache quantQuaternionCache; diff --git a/include/nbl/asset/utils/CVertexWelder.h b/include/nbl/asset/utils/CVertexWelder.h index 7f6065e2a9..21d4236438 100644 --- a/include/nbl/asset/utils/CVertexWelder.h +++ b/include/nbl/asset/utils/CVertexWelder.h @@ -6,350 +6,355 @@ #include "nbl/asset/utils/CPolygonGeometryManipulator.h" -namespace nbl::asset { +namespace nbl::asset +{ template concept VertexWelderAccelerationStructure = requires(T const cobj, hlsl::float32_t3 position, std::function fn) { - typename T::vertex_data_t; - { std::same_as }; - { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; + typename T::vertex_data_t; + { std::same_as }; + { cobj.forEachBroadphaseNeighborCandidates(position, fn) } -> std::same_as; }; -class CVertexWelder { - - public: - - class WeldPredicate - { - public: - virtual bool init(const ICPUPolygonGeometry* geom) = 0; - virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; - virtual ~WeldPredicate() = default; - }; - - class DefaultWeldPredicate : public WeldPredicate - { - private: - - struct SDataViewContext +class CVertexWelder +{ + public: + class WeldPredicate { - uint32_t channelCount : 3; - uint32_t byteSize: 29; + public: + virtual bool init(const ICPUPolygonGeometry* geom) = 0; + virtual bool operator()(const ICPUPolygonGeometry* geom, uint32_t idx1, uint32_t idx2) const = 0; + virtual ~WeldPredicate() = default; }; - static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) + class DefaultWeldPredicate final : public WeldPredicate { - const auto* basePtr = reinterpret_cast(view.getPointer()); - const auto stride = view.composed.stride; - return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); - } - - static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) - { - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - for (auto channel_i = 0u; channel_i < channelCount; channel_i++) - { - const auto diff = abs(val1[channel_i] - val2[channel_i]); - if (diff > epsilon) return false; - } - return true; - } - - static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) - { - if (context.byteSize == 0) return true; - - assert(view); - assert(view.composed.isFormatted()); - assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - return isIntegralElementEqual(view, index1, index2, context.byteSize); - } - default: - { - return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); - } - } - return true; - } - - static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) - { - if (context.byteSize == 0) return true; - - assert(view); - assert(view.composed.isFormatted()); - assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); - switch (view.composed.rangeFormat) - { - case IGeometryBase::EAABBFormat::U64: - case IGeometryBase::EAABBFormat::U32: - case IGeometryBase::EAABBFormat::S64: - case IGeometryBase::EAABBFormat::S32: - { - return isIntegralElementEqual(view, index1, index2, context.byteSize); - } - default: - { - if (context.channelCount != 3) - return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); - - hlsl::float64_t4 val1, val2; - view.decodeElement(index1, val1); - view.decodeElement(index2, val2); - return (1.0 - hlsl::dot(val1, val2)) < epsilon; - } - } - } - - float m_epsilon; - - SDataViewContext m_positionViewContext; - SDataViewContext m_normalViewContext; - - struct SJointViewContext - { - SDataViewContext indices; - SDataViewContext weights; + struct SDataViewContext + { + uint32_t channelCount : 3; + uint32_t byteSize: 29; + }; + + static inline bool isIntegralElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t byteSize) + { + const auto* basePtr = reinterpret_cast(view.getPointer()); + const auto stride = view.composed.stride; + return (memcmp(basePtr + (index1 * stride), basePtr + (index2 * stride), byteSize) == 0); + } + + static inline bool isRealElementEqual(const ICPUPolygonGeometry::SDataView& view, uint32_t index1, uint32_t index2, uint32_t channelCount, float epsilon) + { + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + for (auto channel_i = 0u; channel_i < channelCount; channel_i++) + { + const auto diff = abs(val1[channel_i] - val2[channel_i]); + if (diff > epsilon) return false; + } + return true; + } + + static inline bool isAttributeValEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + } + } + return true; + } + + static inline bool isAttributeDirEqual(const ICPUPolygonGeometry::SDataView& view, const SDataViewContext& context, uint32_t index1, uint32_t index2, float epsilon) + { + if (context.byteSize == 0) return true; + + assert(view); + assert(view.composed.isFormatted()); + assert(IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat); + switch (view.composed.rangeFormat) + { + case IGeometryBase::EAABBFormat::U64: + case IGeometryBase::EAABBFormat::U32: + case IGeometryBase::EAABBFormat::S64: + case IGeometryBase::EAABBFormat::S32: + { + return isIntegralElementEqual(view, index1, index2, context.byteSize); + } + default: + { + if (context.channelCount != 3) + return isRealElementEqual(view, index1, index2, context.channelCount, epsilon); + + hlsl::float64_t4 val1, val2; + view.decodeElement(index1, val1); + view.decodeElement(index2, val2); + return (1.0 - hlsl::dot(val1, val2)) < epsilon; + } + } + } + + float m_epsilon; + + SDataViewContext m_positionViewContext; + SDataViewContext m_normalViewContext; + + struct SJointViewContext + { + SDataViewContext indices; + SDataViewContext weights; + }; + core::vector m_jointViewContexts; + + core::vector m_auxAttributeViewContexts; + + public: + inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} + + inline bool init(const ICPUPolygonGeometry* polygon) override + { + auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) + { + return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; + }; + auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext + { + if (!view) + { + return { + .channelCount = 0, + .byteSize = 0 + }; + } + return { + .channelCount = getFormatChannelCount(view.composed.format), + .byteSize = getTexelOrBlockBytesize(view.composed.format) + }; + }; + + if (!polygon->valid()) + return false; + + const auto& positionView = polygon->getPositionView(); + if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) != positionView.composed.rangeFormat) + return false; + m_positionViewContext = { + .channelCount = getFormatChannelCount(positionView.composed.format), + .byteSize = getTexelOrBlockBytesize(positionView.composed.format), + }; + + const auto& normalView = polygon->getNormalView(); + if (normalView && !isViewFormatValid(normalView)) + return false; + m_normalViewContext = getViewContext(normalView); + + m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); + for (const auto& jointWeightView : polygon->getJointWeightViews()) + { + if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) + return false; + if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) + return false; + m_jointViewContexts.push_back({ + .indices = getViewContext(jointWeightView.indices), + .weights = getViewContext(jointWeightView.weights), + }); + } + + m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); + for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) + { + if (auxAttributeView && !isViewFormatValid(auxAttributeView)) + return false; + m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); + } + + return true; + } + + inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override + { + if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) + return false; + + const auto& normalView = polygon->getNormalView(); + if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) + return false; + + for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) + { + const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; + if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) + return false; + if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) + return false; + } + + const auto& auxAttrViews = polygon->getAuxAttributeViews(); + for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) + if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) + return false; + + return true; + } + + inline ~DefaultWeldPredicate() override = default; }; - core::vector m_jointViewContexts; - - core::vector m_auxAttributeViewContexts; - - public: - - inline DefaultWeldPredicate(float epsilon) : m_epsilon(epsilon) {} - inline bool init(const ICPUPolygonGeometry* polygon) override + template + static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicate& shouldWeldFn, const bool recomputeHash=true) { - auto isViewFormatValid = [](const ICPUPolygonGeometry::SDataView& view) - { - return view.composed.isFormatted() && IGeometryBase::getMatchingAABBFormat(view.composed.format) == view.composed.rangeFormat; - }; - auto getViewContext = [](const ICPUPolygonGeometry::SDataView& view) -> SDataViewContext - { - if (!view) - { - return { - .channelCount = 0, - .byteSize = 0 + auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); + + if (!shouldWeldFn.init(polygon)) + return nullptr; + + const auto& positionView = polygon->getPositionView(); + const auto vertexCount = positionView.getElementCount(); + + constexpr auto INVALID_INDEX = std::numeric_limits::max(); + core::vector remappedVertexIndexes(vertexCount); + + uint32_t maxRemappedIndex = 0; + // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together + for (uint32_t index = 0; index < vertexCount; index++) + { + hlsl::float32_t3 position; + positionView.decodeElement(index, position); + auto remappedVertexIndex = INVALID_INDEX; + as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { + const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; + // make sure we can only map higher indices to lower indices to disallow loops + if (candidate.indexgetIndexView(); + const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; + + auto createRemappedIndexView = [&](size_t indexCount) { + const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); + auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); + auto remappedIndexView = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = indexSize, + .rangeFormat = remappedRangeFormat + }, + .src = { + .offset = 0, + .size = remappedIndexBuffer->getSize(), + .buffer = std::move(remappedIndexBuffer) + } + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) + { + hlsl::shapes::AABB<4, uint16_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u16 = aabb; + remappedIndexView.composed.format = EF_R16_UINT; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + hlsl::shapes::AABB<4, uint32_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxRemappedIndex; + remappedIndexView.composed.encodedDataRange.u32 = aabb; + remappedIndexView.composed.format = EF_R32_UINT; + } + + return remappedIndexView; }; - } - return { - .channelCount = getFormatChannelCount(view.composed.format), - .byteSize = getTexelOrBlockBytesize(view.composed.format) - }; - }; - - if (!polygon->valid()) return false; - - const auto& positionView = polygon->getPositionView(); - if (IGeometryBase::getMatchingAABBFormat(positionView.composed.format) != positionView.composed.rangeFormat) return false; - m_positionViewContext = { - .channelCount = getFormatChannelCount(positionView.composed.format), - .byteSize = getTexelOrBlockBytesize(positionView.composed.format), - }; - - const auto& normalView = polygon->getNormalView(); - if (normalView && !isViewFormatValid(normalView)) return false; - m_normalViewContext = getViewContext(normalView); - - m_jointViewContexts.reserve(polygon->getJointWeightViews().size()); - for (const auto& jointWeightView : polygon->getJointWeightViews()) - { - if (jointWeightView.indices && !isViewFormatValid(jointWeightView.indices)) return false; - if (jointWeightView.weights && !isViewFormatValid(jointWeightView.weights)) return false; - m_jointViewContexts.push_back({ - .indices = getViewContext(jointWeightView.indices), - .weights = getViewContext(jointWeightView.weights), - }); - } - - m_auxAttributeViewContexts.reserve(polygon->getAuxAttributeViews().size()); - for (const auto& auxAttributeView : polygon->getAuxAttributeViews()) - { - if (auxAttributeView && !isViewFormatValid(auxAttributeView)) return false; - m_auxAttributeViewContexts.push_back(getViewContext(auxAttributeView)); - } - - return true; - } - inline bool operator()(const ICPUPolygonGeometry* polygon, uint32_t index1, uint32_t index2) const override - { - if (!isAttributeValEqual(polygon->getPositionView(), m_positionViewContext, index1, index2, m_epsilon)) - return false; - - const auto& normalView = polygon->getNormalView(); - if (!isAttributeDirEqual(normalView, m_normalViewContext, index1, index2, m_epsilon)) - return false; - - for (uint64_t joint_i = 0; joint_i < polygon->getJointWeightViews().size(); joint_i++) - { - const auto& jointWeightView = polygon->getJointWeightViews()[joint_i]; - if (!isAttributeValEqual(jointWeightView.indices, m_jointViewContexts[joint_i].indices, index1, index2, m_epsilon)) return false; - if (!isAttributeValEqual(jointWeightView.weights, m_jointViewContexts[joint_i].weights, index1, index2, m_epsilon)) return false; - } - - const auto& auxAttrViews = polygon->getAuxAttributeViews(); - for (uint64_t aux_i = 0; aux_i < auxAttrViews.size(); aux_i++) - { - if (!isAttributeValEqual(auxAttrViews[aux_i], m_auxAttributeViewContexts[aux_i], index1, index2, m_epsilon)) return false; - } - - return true; + if (indexView) + { + auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); + auto remappedIndexes = [&]() -> bool { + auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) + { + hlsl::vector index; + indexView.decodeElement>(index_i, index); + const auto remappedIndex = remappedVertexIndexes[index.x]; + remappedIndexPtr[index_i] = static_cast(remappedIndex); + if (remappedIndex == INVALID_INDEX) return false; + } + return true; + }; + + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!remappedIndexes.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!remappedIndexes.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + + } else + { + auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); + + auto fillRemappedIndex = [&](){ + auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); + for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) + { + if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; + remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; + } + return true; + }; + if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { + if (!fillRemappedIndex.template operator()()) return nullptr; + } + + outPolygon->setIndexView(std::move(remappedIndexView)); + } + + if (recomputeHash) + CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); + return outPolygon; } - - inline ~DefaultWeldPredicate() override = default; - - }; - - template - static inline core::smart_refctd_ptr weldVertices(const ICPUPolygonGeometry* polygon, const AccelStructureT& as, WeldPredicate& shouldWeldFn) { - auto outPolygon = core::move_and_static_cast(polygon->clone(0u)); - - if (!shouldWeldFn.init(polygon)) return nullptr; - - const auto& positionView = polygon->getPositionView(); - const auto vertexCount = positionView.getElementCount(); - - constexpr auto INVALID_INDEX = std::numeric_limits::max(); - core::vector remappedVertexIndexes(vertexCount); - - uint32_t maxRemappedIndex = 0; - // iterate by index, so that we always use the smallest index when multiple vertexes can be welded together - for (uint32_t index = 0; index < vertexCount; index++) - { - hlsl::float32_t3 position; - positionView.decodeElement(index, position); - auto remappedVertexIndex = INVALID_INDEX; - as.forEachBroadphaseNeighborCandidates(position, [&](const typename AccelStructureT::vertex_data_t& candidate) { - const auto neighborRemappedIndex = remappedVertexIndexes[candidate.index]; - // make sure we can only map higher indices to lower indices to disallow loops - if (candidate.indexgetIndexView(); - const auto remappedRangeFormat = (maxRemappedIndex - 1) < static_cast(std::numeric_limits::max()) ? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32; - - auto createRemappedIndexView = [&](size_t indexCount) { - const uint32_t indexSize = remappedRangeFormat == IGeometryBase::EAABBFormat::U16 ? sizeof(uint16_t) : sizeof(uint32_t); - auto remappedIndexBuffer = ICPUBuffer::create({indexSize * indexCount, IBuffer::EUF_INDEX_BUFFER_BIT}); - auto remappedIndexView = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = indexSize, - .rangeFormat = remappedRangeFormat - }, - .src = { - .offset = 0, - .size = remappedIndexBuffer->getSize(), - .buffer = std::move(remappedIndexBuffer) - } - }; - - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) - { - hlsl::shapes::AABB<4, uint16_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = maxRemappedIndex; - remappedIndexView.composed.encodedDataRange.u16 = aabb; - remappedIndexView.composed.format = EF_R16_UINT; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - hlsl::shapes::AABB<4, uint32_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = maxRemappedIndex; - remappedIndexView.composed.encodedDataRange.u32 = aabb; - remappedIndexView.composed.format = EF_R32_UINT; - } - - return remappedIndexView; - }; - - - if (indexView) - { - auto remappedIndexView = createRemappedIndexView(polygon->getIndexCount()); - auto remappedIndexes = [&]() -> bool { - auto* remappedIndexPtr = reinterpret_cast(remappedIndexView.getPointer()); - for (uint32_t index_i = 0; index_i < polygon->getIndexCount(); index_i++) - { - hlsl::vector index; - indexView.decodeElement>(index_i, index); - const auto remappedIndex = remappedVertexIndexes[index.x]; - remappedIndexPtr[index_i] = static_cast(remappedIndex); - if (remappedIndex == INVALID_INDEX) return false; - } - return true; - }; - - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - if (!remappedIndexes.template operator()()) return nullptr; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - if (!remappedIndexes.template operator()()) return nullptr; - } - - outPolygon->setIndexView(std::move(remappedIndexView)); - - } else - { - auto remappedIndexView = createRemappedIndexView(remappedVertexIndexes.size()); - - auto fillRemappedIndex = [&](){ - auto remappedIndexBufferPtr = reinterpret_cast(remappedIndexView.getPointer()); - for (uint32_t index_i = 0; index_i < remappedVertexIndexes.size(); index_i++) - { - if (remappedVertexIndexes[index_i] == INVALID_INDEX) return false; - remappedIndexBufferPtr[index_i] = remappedVertexIndexes[index_i]; - } - return true; - }; - if (remappedRangeFormat == IGeometryBase::EAABBFormat::U16) { - if (!fillRemappedIndex.template operator()()) return nullptr; - } - else if (remappedRangeFormat == IGeometryBase::EAABBFormat::U32) { - if (!fillRemappedIndex.template operator()()) return nullptr; - } - - outPolygon->setIndexView(std::move(remappedIndexView)); - } - - CGeometryManipulator::recomputeContentHash(outPolygon->getIndexView()); - return outPolygon; - } }; } diff --git a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp index b4f2f2ef06..818751052b 100644 --- a/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp +++ b/src/nbl/asset/utils/CPolygonGeometryManipulator.cpp @@ -20,257 +20,163 @@ namespace nbl::asset { -core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo) +core::smart_refctd_ptr CPolygonGeometryManipulator::createUnweldedList(const ICPUPolygonGeometry* inGeo, const bool reverse, const bool recomputeHash) { - const auto* indexing = inGeo->getIndexingCallback(); - if (!indexing) - return nullptr; + const auto* indexing = inGeo->getIndexingCallback(); + if (!indexing) + return nullptr; - const auto indexView = inGeo->getIndexView(); - const auto primCount = inGeo->getPrimitiveCount(); - const uint8_t degree = indexing->degree(); - const auto outIndexCount = primCount*degree; - if (outIndexCountgetIndexView(); + const auto primCount = inGeo->getPrimitiveCount(); + const uint8_t degree = indexing->degree(); + const auto outIndexCount = primCount*degree; + if (outIndexCount(inGeo->clone(0u)); + const auto outGeometry = core::move_and_static_cast(inGeo->clone(0u)); - auto* outGeo = outGeometry.get(); - outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); + auto* outGeo = outGeometry.get(); + outGeo->setIndexing(IPolygonGeometryBase::NGonList(degree)); - auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView - { - if (!inView) - return {}; - auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); - return { - .composed = inView.composed, - .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} - }; + auto createOutView = [&](const ICPUPolygonGeometry::SDataView& inView) -> ICPUPolygonGeometry::SDataView + { + if (!inView) + return {}; + auto buffer = ICPUBuffer::create({ outIndexCount*inView.composed.stride , inView.src.buffer->getUsageFlags() }); + return { + .composed = inView.composed, + .src = {.offset = 0, .size = buffer->getSize(), .buffer = std::move(buffer)} }; + }; - const auto inIndexView = inGeo->getIndexView(); - auto outIndexView = createOutView(inIndexView); - auto indexBuffer = outIndexView.src.buffer; - const auto indexSize = inIndexView.composed.stride; - std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); - outGeo->setIndexView({}); - - const auto inVertexView = inGeo->getPositionView(); - auto outVertexView = createOutView(inVertexView); - auto vertexBuffer = outVertexView.src.buffer; - const auto vertexSize = inVertexView.composed.stride; - const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); - std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); - outGeo->setPositionView(std::move(outVertexView)); - - const auto inNormalView = inGeo->getNormalView(); - const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); - auto outNormalView = createOutView(inNormalView); - auto outNormalBuffer = outNormalView.src.buffer; - outGeo->setNormalView(std::move(outNormalView)); - - outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; - auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); - outJointWeightView.indices = createOutView(inJointWeightView.indices); - outJointWeightView.weights = createOutView(inJointWeightView.weights); - } + const auto inIndexView = inGeo->getIndexView(); + auto outIndexView = createOutView(inIndexView); + auto indexBuffer = outIndexView.src.buffer; + const auto indexSize = inIndexView.composed.stride; + std::byte* outIndices = reinterpret_cast(outIndexView.getPointer()); + outGeo->setIndexView({}); + + const auto inVertexView = inGeo->getPositionView(); + auto outVertexView = createOutView(inVertexView); + auto vertexBuffer = outVertexView.src.buffer; + const auto vertexSize = inVertexView.composed.stride; + const std::byte* inVertices = reinterpret_cast(inVertexView.getPointer()); + std::byte* const outVertices = reinterpret_cast(vertexBuffer->getPointer()); + outGeo->setPositionView(std::move(outVertexView)); + + const auto inNormalView = inGeo->getNormalView(); + const std::byte* const inNormals = reinterpret_cast(inNormalView.getPointer()); + auto outNormalView = createOutView(inNormalView); + auto outNormalBuffer = outNormalView.src.buffer; + outGeo->setNormalView(std::move(outNormalView)); + + outGeometry->getJointWeightViews()->resize(inGeo->getJointWeightViews().size()); + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inJointWeightView = inGeo->getJointWeightViews()[jointView_i]; + auto& outJointWeightView = outGeometry->getJointWeightViews()->operator[](jointView_i); + outJointWeightView.indices = createOutView(inJointWeightView.indices); + outJointWeightView.weights = createOutView(inJointWeightView.weights); + } - outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); + outGeometry->getAuxAttributeViews()->resize(inGeo->getAuxAttributeViews().size()); + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + outGeo->getAuxAttributeViews()->operator[](auxView_i) = createOutView(inGeo->getAuxAttributeViews()[auxView_i]); - std::array indices; - for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + std::array indices; + for (uint64_t prim_i = 0u; prim_i < primCount; prim_i++) + { + IPolygonGeometryBase::IIndexingCallback::SContext context{ + .indexBuffer = indexView.getPointer(), + .indexSize = indexView.composed.stride, + .beginPrimitive = prim_i, + .endPrimitive = prim_i + 1, + .reversePrims = reverse, + .out = indices.data() + }; + indexing->operator()(context); + for (uint8_t primIndex_i=0; primIndex_i context{ - .indexBuffer = indexView.getPointer(), - .indexSize = indexView.composed.stride, - .beginPrimitive = prim_i, - .endPrimitive = prim_i + 1, - .out = indices.data() - }; - indexing->operator()(context); - for (uint8_t primIndex_i=0; primIndex_i(outNormalBuffer->getPointer()); - const auto normalSize = inNormalView.composed.stride; - memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); - } - - for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) - { - auto& inView = inGeo->getJointWeightViews()[jointView_i]; - auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); - - const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); - const auto jointIndexSize = inView.indices.composed.stride; - std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); - memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); - - const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); - const auto jointWeightSize = inView.weights.composed.stride; - std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); - memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); - } - - for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) - { - auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; - auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); - const auto attrSize = inView.composed.stride; - const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); - std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); - memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); - } - } + const auto outIndex = prim_i * degree + primIndex_i; + const auto inIndex = indices[primIndex_i]; + // TODO: these memcpys from view to view could really be DRY-ed and lambdified + memcpy(outIndices + outIndex * indexSize, &outIndex, indexSize); + memcpy(outVertices + outIndex * vertexSize, inVertices + inIndex * vertexSize, vertexSize); + if (inNormalView) + { + std::byte* const outNormals = reinterpret_cast(outNormalBuffer->getPointer()); + const auto normalSize = inNormalView.composed.stride; + memcpy(outNormals + outIndex * normalSize, inNormals + inIndex * normalSize, normalSize); + } + + for (uint64_t jointView_i = 0u; jointView_i < inGeo->getJointWeightViews().size(); jointView_i++) + { + auto& inView = inGeo->getJointWeightViews()[jointView_i]; + auto& outView = outGeometry->getJointWeightViews()->operator[](jointView_i); + + const std::byte* const inJointIndices = reinterpret_cast(inView.indices.getPointer()); + const auto jointIndexSize = inView.indices.composed.stride; + std::byte* const outJointIndices = reinterpret_cast(outView.indices.getPointer()); + memcpy(outJointIndices + outIndex * jointIndexSize, inJointIndices + inIndex * jointIndexSize, jointIndexSize); + + const std::byte* const inWeights = reinterpret_cast(inView.weights.getPointer()); + const auto jointWeightSize = inView.weights.composed.stride; + std::byte* const outWeights = reinterpret_cast(outView.weights.getPointer()); + memcpy(outWeights + outIndex * jointWeightSize, outWeights + inIndex * jointWeightSize, jointWeightSize); + } + + for (uint64_t auxView_i = 0u; auxView_i < inGeo->getAuxAttributeViews().size(); auxView_i++) + { + auto& inView = inGeo->getAuxAttributeViews()[auxView_i]; + auto& outView = outGeometry->getAuxAttributeViews()->operator[](auxView_i); + const auto attrSize = inView.composed.stride; + const std::byte* const inAuxs = reinterpret_cast(inView.getPointer()); + std::byte* const outAuxs = reinterpret_cast(outView.getPointer()); + memcpy(outAuxs + outIndex * attrSize, inAuxs + inIndex * attrSize, attrSize); + } } + } + + if (recomputeHash) + recomputeContentHashes(outGeo); - recomputeContentHashes(outGeo); - return outGeometry; + return outGeometry; } -core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp) +core::smart_refctd_ptr CPolygonGeometryManipulator::createSmoothVertexNormal(const ICPUPolygonGeometry* inPolygon, bool enableWelding, float epsilon, SSNGVxCmpFunction vxcmp, const bool recomputeHash) { - if (!inPolygon) - { - _NBL_DEBUG_BREAK_IF(true); - return nullptr; - } - - // Mesh need to be unwelded (TODO: why? the output only need to be unwelded, really should be checking `inPolygon->getIndexingCallback()->count()!=3`) - if (inPolygon->getIndexView() && inPolygon->getIndexingCallback()!=IPolygonGeometryBase::TriangleList()) - { + if (!inPolygon) + { _NBL_DEBUG_BREAK_IF(true); return nullptr; - } + } - auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp); - if (enableWelding) - { - auto weldPredicate = CVertexWelder::DefaultWeldPredicate(epsilon); - return CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, weldPredicate); - } - return result.geom; -} + if (!inPolygon->getIndexingCallback() && inPolygon->getIndexingCallback()->degree()!=3) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } -#if 0 -//! Flips the direction of surfaces. Changes backfacing triangles to frontfacing -//! triangles and vice versa. -//! \param mesh: Mesh on which the operation is performed. -void IMeshManipulator::flipSurfaces(ICPUMeshBuffer* inbuffer) -{ - if (!inbuffer) - return; - auto* pipeline = inbuffer->getPipeline(); - const E_PRIMITIVE_TOPOLOGY primType = pipeline->getCachedCreationParams().primitiveAssembly.primitiveType; + // right now we can't handle this, see TODOs in CSmoothNormalGenerator + if (inPolygon->getIndexView()) + { + _NBL_DEBUG_BREAK_IF(true); + return nullptr; + } - const uint32_t idxcnt = inbuffer->getIndexCount(); - if (!inbuffer->getIndices()) - return; + auto result = CSmoothNormalGenerator::calculateNormals(inPolygon, epsilon, vxcmp, false); + if (enableWelding) + { + auto weldPredicate = CVertexWelder::DefaultWeldPredicate(epsilon); + result.geom = CVertexWelder::weldVertices(result.geom.get(), result.vertexHashGrid, weldPredicate, false); + } - if (inbuffer->getIndexType() == EIT_16BIT) - { - uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); - switch (primType) - { - case EPT_TRIANGLE_FAN: - for (uint32_t i = 1; i < idxcnt; i += 2) - { - const uint16_t tmp = idx[i]; - idx[i] = idx[i + 1]; - idx[i + 1] = tmp; - } - break; - case EPT_TRIANGLE_STRIP: - if (idxcnt % 2) //odd - { - for (uint32_t i = 0; i < (idxcnt >> 1); i++) - { - const uint16_t tmp = idx[i]; - idx[i] = idx[idxcnt - 1 - i]; - idx[idxcnt - 1 - i] = tmp; - } - } - else //even - { - auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint16_t) }); - auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); - destPtr[0] = idx[0]; - memcpy(destPtr + 1u, idx, sizeof(uint16_t) * idxcnt); - inbuffer->setIndexCount(idxcnt + 1u); - SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; - inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); - } - break; - case EPT_TRIANGLE_LIST: - for (uint32_t i = 0; i < idxcnt; i += 3) - { - const uint16_t tmp = idx[i + 1]; - idx[i + 1] = idx[i + 2]; - idx[i + 2] = tmp; - } - break; - default: break; - } - } - else if (inbuffer->getIndexType() == EIT_32BIT) - { - uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); - switch (primType) - { - case EPT_TRIANGLE_FAN: - for (uint32_t i = 1; i < idxcnt; i += 2) - { - const uint32_t tmp = idx[i]; - idx[i] = idx[i + 1]; - idx[i + 1] = tmp; - } - break; - case EPT_TRIANGLE_STRIP: - if (idxcnt % 2) //odd - { - for (uint32_t i = 0; i < (idxcnt >> 1); i++) - { - const uint32_t tmp = idx[i]; - idx[i] = idx[idxcnt - 1 - i]; - idx[idxcnt - 1 - i] = tmp; - } - } - else //even - { - auto newIndexBuffer = ICPUBuffer::create({ (idxcnt + 1u) * sizeof(uint32_t) }); - auto* destPtr = reinterpret_cast(newIndexBuffer->getPointer()); - destPtr[0] = idx[0]; - memcpy(destPtr + 1u, idx, sizeof(uint32_t) * idxcnt); - inbuffer->setIndexCount(idxcnt + 1); - SBufferBinding ixBufBinding{ 0u, std::move(newIndexBuffer) }; - inbuffer->setIndexBufferBinding(std::move(ixBufBinding)); - } - break; - case EPT_TRIANGLE_LIST: - for (uint32_t i = 0; i < idxcnt; i += 3) - { - const uint32_t tmp = idx[i + 1]; - idx[i + 1] = idx[i + 2]; - idx[i + 2] = tmp; - } - break; - default: break; - } - } + if (recomputeHash) + recomputeContentHashes(result.geom.get()); + return result.geom; } +#if 0 core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOptimized(const ICPUMeshBuffer* _inbuffer) { if (!_inbuffer) @@ -401,128 +307,6 @@ core::smart_refctd_ptr CMeshManipulator::createMeshBufferFetchOp return outbuffer; } -//! Creates a copy of the mesh, which will only consist of unique primitives -core::smart_refctd_ptr IMeshManipulator::createMeshBufferUniquePrimitives(ICPUMeshBuffer* inbuffer, bool _makeIndexBuf) -{ - if (!inbuffer) - return nullptr; - const ICPURenderpassIndependentPipeline* oldPipeline = inbuffer->getPipeline(); - if (!oldPipeline) - return nullptr; - - const uint32_t idxCnt = inbuffer->getIndexCount(); - if (idxCnt<2u || !inbuffer->getIndices()) - return core::smart_refctd_ptr(inbuffer); // yes we want an extra grab - - const auto& oldVtxParams = oldPipeline->getCachedCreationParams().vertexInput; - - auto clone = core::move_and_static_cast(inbuffer->clone(0u)); - - constexpr uint32_t NEW_VTX_BUF_BINDING = 0u; - - auto pipeline = core::smart_refctd_ptr_static_cast(oldPipeline->clone(0u)); - auto& vtxParams = pipeline->getCachedCreationParams().vertexInput; - vtxParams = SVertexInputParams(); - - vtxParams.enabledBindingFlags = (1u<getAttribBoundBuffer(i); - if (inbuffer->isAttributeEnabled(i) && vbuf.buffer) - { - offset[i] = stride; - newAttribSizes[i] = getTexelOrBlockBytesize(inbuffer->getAttribFormat(i)); - stride += newAttribSizes[i]; - if (stride>=0xdeadbeefu) - return nullptr; - - sourceBuffers[i] = reinterpret_cast(vbuf.buffer->getPointer()); - sourceBuffers[i] += inbuffer->getAttribCombinedOffset(i); - sourceBufferStrides[i] = inbuffer->getAttribStride(i); - } - else - offset[i] = -1; - } - - vtxParams.bindings[NEW_VTX_BUF_BINDING].inputRate = SVertexInputBindingParams::EVIR_PER_VERTEX; - vtxParams.bindings[NEW_VTX_BUF_BINDING].stride = stride; - - auto vertexBuffer = ICPUBuffer::create({ stride*idxCnt }); - clone->setVertexBufferBinding({0u, vertexBuffer}, 0u); - for (size_t i=0; i= 0) - { - vtxParams.attributes[i].binding = NEW_VTX_BUF_BINDING; - vtxParams.attributes[i].format = inbuffer->getAttribFormat(i); - vtxParams.attributes[i].relativeOffset = offset[i]; - } - } - - uint8_t* destPointer = reinterpret_cast(vertexBuffer->getPointer()); - if (inbuffer->getIndexType()==EIT_16BIT) - { - uint16_t* idx = reinterpret_cast(inbuffer->getIndices()); - for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); - destPointer += newAttribSizes[j]; - } - } - else if (inbuffer->getIndexType()==EIT_32BIT) - { - uint32_t* idx = reinterpret_cast(inbuffer->getIndices()); - for (uint64_t i=0; igetBaseVertex())*sourceBufferStrides[j],newAttribSizes[j]); - destPointer += newAttribSizes[j]; - } - } - - clone->setPipeline(std::move(pipeline)); - - if (_makeIndexBuf) - { - auto idxbuf = ICPUBuffer::create({ idxCnt*(idxCnt<0x10000 ? 2u : 4u) }); - if (idxCnt<0x10000u) - { - for (uint32_t i = 0u; i < idxCnt; ++i) - reinterpret_cast(idxbuf->getPointer())[i] = i; - clone->setIndexType(EIT_16BIT); - } - else - { - for (uint32_t i = 0u; i < idxCnt; ++i) - reinterpret_cast(idxbuf->getPointer())[i] = i; - clone->setIndexType(EIT_32BIT); - } - clone->setIndexBufferBinding({ 0u, std::move(idxbuf) }); - } - else - { - clone->setIndexType(EIT_UNKNOWN); - } - } - - return clone; -} core::smart_refctd_ptr IMeshManipulator::createOptimizedMeshBuffer(const ICPUMeshBuffer* _inbuffer, const SErrorMetric* _errMetric) { @@ -1358,78 +1142,5 @@ bool CMeshManipulator::calcMaxQuantizationError(const SAttribTypeChoice& _srcTyp return true; } - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromLineStripsToLines(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - else - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - else - return CMeshManipulator::lineStripsToLines(_input, _idxCount); - } - return nullptr; -} - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromTriangleStripsToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - else - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - else - return CMeshManipulator::triangleStripsToTriangles(_input, _idxCount); - } - return nullptr; -} - -core::smart_refctd_ptr IMeshManipulator::idxBufferFromTrianglesFanToTriangles(const void* _input, uint32_t& _idxCount, E_INDEX_TYPE _inIndexType, E_INDEX_TYPE _outIndexType) -{ - if (_inIndexType == EIT_16BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - else - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - } - else if (_inIndexType == EIT_32BIT) - { - if (_outIndexType == EIT_16BIT) - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - else - return CMeshManipulator::trianglesFanToTriangles(_input, _idxCount); - } - return nullptr; -} - -float IMeshManipulator::DistanceToLine(core::vectorSIMDf P0, core::vectorSIMDf P1, core::vectorSIMDf InPoint) -{ - core::vectorSIMDf PointToStart = InPoint - P0; - core::vectorSIMDf Diff = core::cross(P0 - P1, PointToStart); - - return core::dot(Diff, Diff).x; -} - -float IMeshManipulator::DistanceToPlane(core::vectorSIMDf InPoint, core::vectorSIMDf PlanePoint, core::vectorSIMDf PlaneNormal) -{ - core::vectorSIMDf PointToPlane = InPoint - PlanePoint; - - return (core::dot(PointToPlane, PlaneNormal).x >= 0) ? core::abs(core::dot(PointToPlane, PlaneNormal).x) : 0; -} - #endif } // end namespace nbl::asset From 63cc2690b95e3b41e6b50500117e6fe46074ca5d Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 01:15:35 +0100 Subject: [PATCH 04/54] all shapes in MitsubaLoader must be geometry collections actually use Geometry Transforms --- .../nbl/ext/MitsubaLoader/CMitsubaMetadata.h | 49 ++- include/nbl/ext/MitsubaLoader/SContext.h | 17 +- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 306 ++++++++---------- 3 files changed, 157 insertions(+), 215 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h index 9ffdd8f7fd..8805b7bc2f 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMetadata.h @@ -25,33 +25,21 @@ class CMitsubaMetadata : public asset::IAssetMetadata public: std::string m_id; }; - class IGeometry : public CID - { - public: - inline IGeometry() : CID(), type(CElementShape::Type::INVALID) {} - inline ~IGeometry() = default; - - CElementShape::Type type; - }; - class CPolygonGeometry final : public asset::IPolygonGeometryMetadata, public IGeometry + class CGeometryCollection final : public asset::IGeometryCollectionMetadata, public CID { public: - inline CPolygonGeometry() : asset::IPolygonGeometryMetadata(), IGeometry() {} - inline CPolygonGeometry(CPolygonGeometry&& other) : CPolygonGeometry() {operator=(std::move(other));} - inline ~CPolygonGeometry() = default; + inline CGeometryCollection() : asset::IGeometryCollectionMetadata(), CID(), type(CElementShape::Type::INVALID) {} + inline CGeometryCollection(CGeometryCollection&& other) : CGeometryCollection() {operator=(std::move(other));} + inline ~CGeometryCollection() = default; - inline CPolygonGeometry& operator=(CPolygonGeometry&& other) + inline CGeometryCollection& operator=(CGeometryCollection&& other) { - asset::IPolygonGeometryMetadata::operator=(std::move(other)); - IGeometry::operator=(std::move(other)); + asset::IGeometryCollectionMetadata::operator=(std::move(other)); + CID::operator=(std::move(other)); return *this; } - }; - class CGeometryCollection final : public asset::IGeometryCollectionMetadata, public CID - { - public: - inline CGeometryCollection() : asset::IGeometryCollectionMetadata(), CID() {} - inline ~CGeometryCollection() = default; + + CElementShape::Type type; }; struct SGlobal @@ -69,32 +57,33 @@ class CMitsubaMetadata : public asset::IAssetMetadata const char* getLoaderName() const override {return LoaderName;} // add more overloads when more asset implementations of IGeometry exist - inline const CPolygonGeometry* getAssetSpecificMetadata(const asset::ICPUPolygonGeometry* asset) const + inline const CGeometryCollection* getAssetSpecificMetadata(const asset::ICPUGeometryCollection* asset) const { const auto found = IAssetMetadata::getAssetSpecificMetadata(asset); - return static_cast(found); + return static_cast(found); } private: friend struct SContext; - struct SGeometryMetaPair + struct SGeometryCollectionMetaPair { - core::smart_refctd_ptr geom; - CMitsubaMetadata::CPolygonGeometry meta; + core::smart_refctd_ptr collection; + CMitsubaMetadata::CGeometryCollection meta; }; - inline void setPolygonGeometryMeta(core::unordered_map&& container) + template + inline void setGeometryCollectionMeta(core::unordered_map&& container) { const uint32_t count = container.size(); - m_metaPolygonGeometryStorage = IAssetMetadata::createContainer(count); + m_metaPolygonGeometryStorage = IAssetMetadata::createContainer(count); auto outIt = m_metaPolygonGeometryStorage->begin(); for (auto& el : container) { *outIt = std::move(el.second.meta); - IAssetMetadata::insertAssetSpecificMetadata(el.second.geom.get(),outIt++); + IAssetMetadata::insertAssetSpecificMetadata(el.second.collection.get(),outIt++); } } - meta_container_t m_metaPolygonGeometryStorage; + meta_container_t m_metaPolygonGeometryStorage; }; } diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index f3c952935c..29602a3600 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -19,25 +19,22 @@ struct SContext final { public: SContext( -// const asset::IGeometryCreator* _geomCreator, -// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* _metadata ); - using shape_ass_type = core::smart_refctd_ptr; + using shape_ass_type = core::smart_refctd_ptr; shape_ass_type loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape); - using group_ass_type = core::smart_refctd_ptr; - group_ass_type loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup); + // the `shape` will have to be `Type::SHAPEGROUP` + shape_ass_type loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape* shape); inline void transferMetadata() { - meta->setPolygonGeometryMeta(std::move(shapeCache)); + meta->setGeometryCollectionMeta(std::move(shapeCache)); + meta->setGeometryCollectionMeta(std::move(groupCache)); } -// const asset::IGeometryCreator* creator; -// const asset::IMeshManipulator* manipulator; const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; CMitsubaMetadata* meta; @@ -45,9 +42,9 @@ struct SContext final private: // - core::unordered_map groupCache; + core::unordered_map shapeCache; // - core::unordered_map shapeCache; + core::unordered_map groupCache; #if 0 // stuff that belongs in the Material Compiler backend //image, sampler diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 0151bc5578..94a5e08adb 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -2,12 +2,14 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - -#include +#include "nbl/builtin/hlsl/math/linalg/basic.hlsl" +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include + #if 0 #include "nbl/asset/utils/CDerivativeMapCreator.h" @@ -21,7 +23,8 @@ namespace nbl { -using namespace asset; +using namespace nbl::asset; +using namespace nbl::hlsl; namespace ext::MitsubaLoader { @@ -210,8 +213,6 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: else { SContext ctx( -// m_assetMgr->getGeometryCreator(), -// m_assetMgr->getMeshManipulator(), IAssetLoader::SAssetLoadContext{ IAssetLoader::SAssetLoadParams(_params.decryptionKeyLen,_params.decryptionKey,_params.cacheFlags,_params.loaderFlags,_params.logger,_file->getFileName().parent_path()), _file @@ -224,8 +225,8 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: // TODO: abstract/move away since many loaders will need to do this - core::unordered_map> morphTargetCache; - auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr + core::unordered_map> morphTargetCache; + auto createMorphTargets = [&_params,&morphTargetCache](core::smart_refctd_ptr&& collection)->core::smart_refctd_ptr { auto found = morphTargetCache.find(collection.get()); if (found!=morphTargetCache.end()) @@ -234,7 +235,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: if (targets) { morphTargetCache[collection.get()] = targets; - targets->getTargets()->push_back({.geoCollection=std::move(collection)}); + targets->getTargets()->push_back({.geoCollection=core::smart_refctd_ptr(const_cast(collection.get()))}); } return targets; }; @@ -242,9 +243,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: // auto& instances = ctx.scene->getInstances(); instances.reserve(result.shapegroups.size()); - auto addToScene = [&](const CElementShape* shape, core::smart_refctd_ptr&& collection)->void + auto addToScene = [&](const CElementShape* shape, core::smart_refctd_ptr&& collection)->void { - assert(shape && collection); + if (!collection) + { + _params.logger.log("Failed to load a ICPUGeometryCollection for Shape with id %s",LoggerError,shape->id.c_str()); + return; + } + assert(shape); auto targets = createMorphTargets(std::move(collection)); if (!targets) { @@ -253,7 +259,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: } const auto index = instances.size(); instances.resize(index+1); - instances.getMorphTargets()[index] = std::move(targets); + instances.getMorphTargets()[index] = core::smart_refctd_ptr(const_cast(targets.get())); // TODO: add materials (incl emission) to the instances /* auto emitter = shape->obtainEmitter(); @@ -269,6 +275,8 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: CElementEmitter{} // no backface emission ); */ + if (shape->transform.matrix[3]!=float32_t4(0,0,0,1)) + _params.logger.log("Shape with id %s has Non-Affine transformation matrix, last row is not 0,0,0,1!",system::ILogger::ELL_ERROR,shape->id.c_str()); instances.getInitialTransforms()[index] = shape->getTransform(); }; @@ -281,29 +289,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: continue; if (shapedef->type!=CElementShape::Type::INSTANCE) - { - auto geometry = ctx.loadBasicShape(_hierarchyLevel,shapedef); - if (!geometry) - continue; - auto collection = core::make_smart_refctd_ptr(); - if (!collection) - { - _params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",LoggerError,shapedef->id.c_str()); - continue; - } - // we don't put a transform on the geometry, because we want the transform on the instance - collection->getGeometries()->push_back({.geometry=std::move(geometry)}); - addToScene(shapedef,std::move(collection)); - } + addToScene(shapedef,ctx.loadBasicShape(_hierarchyLevel,shapedef)); else // mitsuba is weird and lists instances under a shapegroup instead of having instances reference the shapegroup { // get group reference const CElementShape* parent = shapedef->instance.parent; if (!parent) // we should probably assert this continue; - assert(parent->type==CElementShape::Type::SHAPEGROUP); - auto collection = ctx.loadShapeGroup(_hierarchyLevel,&parent->shapegroup); - addToScene(shapedef,std::move(collection)); + addToScene(shapedef,ctx.loadShapeGroup(_hierarchyLevel,parent)); } } result.shapegroups.clear(); @@ -621,12 +614,10 @@ inline core::smart_refctd_ptr CMitsubaLoader::createDS using namespace std::string_literals; SContext::SContext( -// const asset::IGeometryCreator* _geomCreator, -// const asset::IMeshManipulator* _manipulator, const asset::IAssetLoader::SAssetLoadContext& _ctx, asset::IAssetLoader::IAssetLoaderOverride* _override, CMitsubaMetadata* _metadata -) : /*creator(_geomCreator), manipulator(_manipulator),*/ inner(_ctx), override_(_override), meta(_metadata) +) : inner(_ctx), override_(_override), meta(_metadata) //,ir(core::make_smart_refctd_ptr()), frontend(this) { auto materialPool = material_compiler3::CTrueIR::create(); @@ -634,11 +625,13 @@ SContext::SContext( frontIR = material_compiler3::CFrontendIR::create(); } -auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup) -> SContext::group_ass_type +auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type { + assert(shape->type==CElementShape::Type::SHAPEGROUP); + const auto* const shapegroup = &shape->shapegroup; auto found = groupCache.find(shapegroup); if (found!=groupCache.end()) - return found->second; + return found->second.collection; auto collection = core::make_smart_refctd_ptr(); if (!collection) @@ -652,80 +645,54 @@ auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape auto child = children[i]; if (!child) continue; - + // shape groups cannot contain instances assert(child->type!=CElementShape::Type::INSTANCE); + + shape_ass_type nestedCollection; if (child->type!=CElementShape::Type::SHAPEGROUP) - { - auto geometry = loadBasicShape(hierarchyLevel,child); - if (geometry) - geometries->push_back({.transform=child->getTransform(),.geometry=std::move(geometry)}); - } + nestedCollection = loadBasicShape(hierarchyLevel,child); else - { - auto nestedCollection = loadShapeGroup(hierarchyLevel,&child->shapegroup); - if (!nestedCollection) - continue; - auto* nestedGeometries = nestedCollection->getGeometries(); - for (auto& ref : *nestedGeometries) - { - auto& newRef = geometries->emplace_back(std::move(ref)); - // thankfully because SHAPEGROUPS are not allowed to have transforms we don't need to rack them up - //if (newRef.hasTransform()) - // newRef.transform = hlsl::mul(thisTransform,newRef.transform); - //else - // newRef.transform = thisTransform; - } - } + nestedCollection = loadShapeGroup(hierarchyLevel,child); + if (!nestedCollection) + continue; + + // note that we flatten geometry collections, different children are their own collections we turn them into one mega-collection + const auto& nestedGeometries = nestedCollection->getGeometries(); + // thankfully because SHAPEGROUPS are not allowed to have transforms we don't need to rack them up + //if (newRef.hasTransform()) + // newRef.transform = hlsl::mul(thisTransform,newRef.transform); + //else + // newRef.transform = thisTransform; + geometries->insert(geometries->end(),nestedGeometries.begin(),nestedGeometries.end()); } - groupCache.insert({shapegroup,collection}); + CMitsubaMetadata::SGeometryCollectionMetaPair pair = {.collection=collection}; + pair.meta.m_id = shape->id; + pair.meta.type = shape->type; + groupCache.insert({shapegroup,std::move(pair)}); } return collection; } -#if 0 -static core::smart_refctd_ptr createMeshFromGeomCreatorReturnType(IGeometryCreator::return_type&& _data, asset::IAssetManager* _manager) -{ - //creating pipeline just to forward vtx and primitive params - auto pipeline = core::make_smart_refctd_ptr( - nullptr, nullptr, nullptr, //no layout nor shaders - _data.inputParams, - asset::SBlendParams(), - _data.assemblyParams, - asset::SRasterizationParams() - ); - - auto mb = core::make_smart_refctd_ptr( - nullptr, nullptr, - _data.bindings, std::move(_data.indexBuffer) - ); - mb->setIndexCount(_data.indexCount); - mb->setIndexType(_data.indexType); - mb->setBoundingBox(_data.bbox); - mb->setPipeline(std::move(pipeline)); - constexpr auto NORMAL_ATTRIBUTE = 3; - mb->setNormalAttributeIx(NORMAL_ATTRIBUTE); - - auto mesh = core::make_smart_refctd_ptr(); - mesh->getMeshBufferVector().push_back(std::move(mb)); - - return mesh; -} -#endif - auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type { auto found = shapeCache.find(shape); if (found!=shapeCache.end()) - return found->second.geom; + return found->second.collection; - core::smart_refctd_ptr geo; - auto exiter = core::makeRAIIExiter<>([&]()->void - { - if (geo) - return; - this->inner.params.logger.log("Failed to Load/Create Basic non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); - } - ); + auto collection = core::make_smart_refctd_ptr(); + if (!collection) + { + inner.params.logger.log("Failed to create an ICPUGeometryCollection non-Instanced Shape with id %s",LoggerError,shape->id.c_str()); + return nullptr; + } + // the geometry reference transform shall only contain an exceptional and optional relative transform like to make Builtin shapes like cubes, spheres, etc. of different sizes + // the whole shape (which is a geometry collection) has its own transform + auto* pGeometries = collection->getGeometries(); + auto addGeometry = [pGeometries](ICPUGeometryCollection::SGeometryReference&& ref)->void + { + if (ref) + pGeometries->push_back(std::move(ref)); + }; #if 0 constexpr uint32_t UV_ATTRIB_ID = 2u; @@ -775,62 +742,52 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape #endif bool flipNormals = false; bool faceNormals = false; - float maxSmoothAngle = hlsl::bit_cast(hlsl::numeric_limits::quiet_NaN); + float maxSmoothAngle = bit_cast(numeric_limits::quiet_NaN); + auto* const creator = override_->getGeometryCreator(); switch (shape->type) { -#if 0 + // TODO: cache the simple geos to not spam new objects ? + // FAR TODO: create some special non-poly geometries for procedural raycasts? case CElementShape::Type::CUBE: { - auto cubeData = ctx.creator->createCubeMesh(core::vector3df(2.f)); - - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCubeMesh(core::vector3df(2.f)), m_assetMgr); flipNormals = flipNormals!=shape->cube.flipNormals; + addGeometry({.geometry=creator->createCube(promote(2.f))}); break; } case CElementShape::Type::SPHERE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createSphereMesh(1.f,64u,64u), m_assetMgr); flipNormals = flipNormals!=shape->sphere.flipNormals; { - core::matrix3x4SIMD tform; - tform.setScale(core::vectorSIMDf(shape->sphere.radius,shape->sphere.radius,shape->sphere.radius)); - tform.setTranslation(shape->sphere.center); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(tform)); + auto tform = math::linalg::diagonal(shape->sphere.radius); + math::linalg::setTranslation(tform,shape->sphere.center); + addGeometry({.transform=tform,.geometry=creator->createSphere(1.f,64u,64u)}); } break; case CElementShape::Type::CYLINDER: + flipNormals = flipNormals!=shape->cylinder.flipNormals; { - auto diff = shape->cylinder.p0-shape->cylinder.p1; - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createCylinderMesh(1.f, 1.f, 64), m_assetMgr); - core::vectorSIMDf up(0.f); - float maxDot = diff[0]; - uint32_t index = 0u; - for (auto i = 1u; i < 3u; i++) - if (diff[i] < maxDot) - { - maxDot = diff[i]; - index = i; - } - up[index] = 1.f; - core::matrix3x4SIMD tform; - // mesh is left haded so transforming by LH matrix is fine (I hope but lets check later on) - core::matrix3x4SIMD::buildCameraLookAtMatrixLH(shape->cylinder.p0,shape->cylinder.p1,up).getInverse(tform); - core::matrix3x4SIMD scale; - scale.setScale(core::vectorSIMDf(shape->cylinder.radius,shape->cylinder.radius,core::length(diff).x)); - shape->transform.matrix = core::concatenateBFollowedByA(shape->transform.matrix,core::matrix4SIMD(core::concatenateBFollowedByA(tform,scale))); + // start off as transpose, so rows are columns + float32_t4x3 extra; + extra[2] = shape->cylinder.p1 - shape->cylinder.p0; + extra[3] = shape->cylinder.p0; + math::frisvad(normalize(extra[2]),extra[0],extra[1]); + for (auto i=0u; i<2u; i++) + { + assert(length(extra[i])==1.f); + extra[i] *= shape->cylinder.radius; + } + addGeometry({.transform=transpose(extra),.geometry=creator->createCylinder(1.f,1.f,64u)}); } - flipNormals = flipNormals!=shape->cylinder.flipNormals; break; case CElementShape::Type::RECTANGLE: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createRectangleMesh(core::vector2df_SIMD(1.f,1.f)), m_assetMgr); - flipNormals = flipNormals!=shape->rectangle.flipNormals; + flipNormals = flipNormals!=shape->cylinder.flipNormals; + addGeometry({.geometry=creator->createRectangle(promote(1.f))}); break; case CElementShape::Type::DISK: - mesh = createMeshFromGeomCreatorReturnType(ctx.creator->createDiskMesh(1.f,64u), m_assetMgr); - flipNormals = flipNormals!=shape->disk.flipNormals; + flipNormals = flipNormals!=shape->cylinder.flipNormals; + addGeometry({.geometry=creator->createDisk(1.f,64)}); break; -#endif -#if 0 case CElementShape::Type::OBJ: +#if 0 // TODO: Arek mesh = loadModel(shape->obj.filename); flipNormals = flipNormals!=shape->obj.flipNormals; faceNormals = shape->obj.faceNormals; @@ -898,70 +855,69 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape } } } +#endif break; case CElementShape::Type::SERIALIZED: - mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); +// mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); flipNormals = flipNormals!=shape->serialized.flipNormals; faceNormals = shape->serialized.faceNormals; maxSmoothAngle = shape->serialized.maxSmoothAngle; break; -#endif case CElementShape::Type::SHAPEGROUP: [[fallthrough]]; case CElementShape::Type::INSTANCE: - assert(false); + assert(false); // this shouldn't happen, our parser code shouldn't reach here break; default: // _NBL_DEBUG_BREAK_IF(true); break; } - // - if (geo) + // handle fail + if (pGeometries->empty()) { -#if 0 - // mesh including meshbuffers needs to be cloned because instance counts and base instances will be changed - if (!newMesh) - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - // flip normals if necessary - if (flipNormals) + inner.params.logger.log("Failed to Load/Create Basic non-Instanced Shape with id %s",system::ILogger::ELL_ERROR,shape->id.c_str()); + return nullptr; + } + + // recompute and flip normals if necessary + if (faceNormals || !std::isnan(maxSmoothAngle)) + { + for (auto& ref : *pGeometries) { - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - auto binding = meshbuffer->getIndexBufferBinding(); - binding.buffer = core::smart_refctd_ptr_static_cast(binding.buffer->clone(0u)); - meshbuffer->setIndexBufferBinding(std::move(binding)); - ctx.manipulator->flipSurfaces(meshbuffer.get()); - } + const float smoothAngleCos = cos(radians(maxSmoothAngle)); + + auto* const polyGeo = static_cast(ref.geometry.get()); + ref.geometry = CPolygonGeometryManipulator::createSmoothVertexNormal( + CPolygonGeometryManipulator::createUnweldedList(polyGeo,flipNormals,false).get(),false,0.f, // TODO: maybe enable welding based on `!faceNormals` later + [faceNormals,smoothAngleCos](const CPolygonGeometryManipulator::SSNGVertexData& v0, const CPolygonGeometryManipulator::SSNGVertexData& v1, const ICPUPolygonGeometry* buffer) + { + if (faceNormals) + return v0.index==v1.index; + else + return dot(v0.weightedNormal,v1.weightedNormal)*rsqrt(dot(v0.weightedNormal,v0.weightedNormal)*dot(v1.weightedNormal,v1.weightedNormal)) >= smoothAngleCos; + }, + true // rewelding or initial unweld mess with all vertex attributes and index buffers, so recompute every hash + ); } - // recompute normalis if necessary - if (faceNormals || !std::isnan(maxSmoothAngle)) - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - const float smoothAngleCos = cos(core::radians(maxSmoothAngle)); - - // TODO: make these mesh manipulator functions const-correct - auto newMeshBuffer = ctx.manipulator->createMeshBufferUniquePrimitives(meshbuffer.get()); - ctx.manipulator->filterInvalidTriangles(newMeshBuffer.get()); - ctx.manipulator->calculateSmoothNormals(newMeshBuffer.get(), false, 0.f, newMeshBuffer->getNormalAttributeIx(), - [&](const asset::IMeshManipulator::SSNGVertexData& a, const asset::IMeshManipulator::SSNGVertexData& b, asset::ICPUMeshBuffer* buffer) - { - if (faceNormals) - return a.indexOffset == b.indexOffset; - else - return core::dot(a.parentTriangleFaceNormal, b.parentTriangleFaceNormal).x >= smoothAngleCos; - }); - meshbuffer = std::move(newMeshBuffer); - } - IMeshManipulator::recalculateBoundingBox(newMesh.get()); - mesh = std::move(newMesh); -#endif - // cache and return - CMitsubaMetadata::SGeometryMetaPair geoMeta = {.geom=std::move(geo)}; - geoMeta.meta.m_id = shape->id; - geoMeta.meta.type = shape->type; - shapeCache.insert({shape,std::move(geoMeta)}); } - return geo; + else if (flipNormals) + { + for (auto& ref : *pGeometries) + { + auto* const polyGeo = static_cast(ref.geometry.get()); + auto flippedGeo = CPolygonGeometryManipulator::createTriangleListIndexing(polyGeo,true,false); + CGeometryManipulator::recomputeContentHash(flippedGeo->getIndexView()); + // TODO: don't we also need to flip the normal buffer values? changing the winding doesn't help because the normals weren't recomputed ! + ref.geometry = std::move(flippedGeo); + } + } + + // cache and return + CMitsubaMetadata::SGeometryCollectionMetaPair pair = {.collection=collection}; + pair.meta.m_id = shape->id; + pair.meta.type = shape->type; + shapeCache.insert({shape,std::move(pair)}); + return collection; } } From a53f301886302f09e1585fda4a81800317d1f714 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 01:57:20 +0100 Subject: [PATCH 05/54] showcase the solution to `IAssetLoader::ELPF_RIGHT_HANDED_MESHES` deprecation --- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 139 +++++++++---------- 1 file changed, 67 insertions(+), 72 deletions(-) diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 94a5e08adb..5e8bbca5eb 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -694,52 +694,74 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape pGeometries->push_back(std::move(ref)); }; -#if 0 - constexpr uint32_t UV_ATTRIB_ID = 2u; - - - auto loadModel = [&](const ext::MitsubaLoader::SPropertyElementData& filename, int64_t index=-1) -> core::smart_refctd_ptr + auto loadModel = [&](const char* filename, int64_t index=-1) -> void { - assert(filename.type==ext::MitsubaLoader::SPropertyElementData::Type::STRING); - auto loadParams = ctx.inner.params; - loadParams.loaderFlags = static_cast(loadParams.loaderFlags | IAssetLoader::ELPF_RIGHT_HANDED_MESHES); - auto retval = interm_getAssetInHierarchy( filename.svalue, loadParams, hierarchyLevel/*+ICPUScene::MESH_HIERARCHY_LEVELS_BELOW*/, ctx.override_); +#if 0 + auto retval = interm_getAssetInHierarchy(filename,inner.params,hierarchyLevel+/*ICPUScene::GEOMETRY_COLLECTION_HIERARCHY_LEVELS_BELOW*/1,override_); if (retval.getContents().empty()) { os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); - return nullptr; - } - if (retval.getAssetType()!=asset::IAsset::ET_MESH) - { - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); - return nullptr; + return; } - auto contentRange = retval.getContents(); - auto serializedMeta = retval.getMetadata()->selfCast(); - // + uint32_t actualIndex = 0; - if (index>=0ll && serializedMeta) - for (auto it=contentRange.begin(); it!=contentRange.end(); it++) + switch (retval.getAssetType()) { - auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); - if (meshMeta->m_id!=static_cast(index)) - continue; - actualIndex = it-contentRange.begin(); - break; + case IAsset::ET_GEOMETRY: + { + auto contentRange = retval.getContents(); + auto serializedMeta = retval.getMetadata()->selfCast(); + // + if (index>=0ll && serializedMeta) + for (auto it=contentRange.begin(); it!=contentRange.end(); it++) + { + auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); + if (meshMeta->m_id!=static_cast(index)) + continue; + actualIndex = it-contentRange.begin(); + break; + } + // + if (contentRange.begin()+actualIndex < contentRange.end()) + { + auto asset = contentRange.begin()[actualIndex]; + if (!asset) + { + return; + } + addGeometry(asset); + } + } + case IAsset::ET_GEOMETRY_COLLECTION: + { + // TODO: replace the collection + break; + } + case IAsset::ET_MORPH_TARGETS: + { + // TODO: take first target and replace the collection + _NBL_DEBUG_BREAK_IF(true); // we have no such loaders right now + break; + } + case IAsset::ET_SCENE: + { + // TODO: flatten the scene into a single instance, this is path for OBJ loading + // NOTE: also need to preserve/forward the materials somehow (need to chape the `shape_ass_type` to have a default Material Binding Table) + } + default: + os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); + break; } - // - if (contentRange.begin()+actualIndex < contentRange.end()) +#endif + // we used to load with the IAssetLoader::ELPF_RIGHT_HANDED_MESHES flag, this means flipping the mesh x-axis + for (auto& ref : *pGeometries) { - auto asset = contentRange.begin()[actualIndex]; - if (!asset) - return nullptr; - return core::smart_refctd_ptr_static_cast(asset); + ref.transform = math::linalg::diagonal(1.f); + ref.transform[0][0] = -1.f; } - else - return nullptr; }; -#endif + bool flipNormals = false; bool faceNormals = false; float maxSmoothAngle = bit_cast(numeric_limits::quiet_NaN); @@ -792,8 +814,10 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape flipNormals = flipNormals!=shape->obj.flipNormals; faceNormals = shape->obj.faceNormals; maxSmoothAngle = shape->obj.maxSmoothAngle; - if (mesh && shape->obj.flipTexCoords) + if (!pGeometries->empty() && shape->obj.flipTexCoords) { + _NBL_DEBUG_BREAK_IF(true); + // TODO: find the UV attribute, it doesn't help we don't name them newMesh = core::smart_refctd_ptr_static_cast (mesh->clone(1u)); for (auto& meshbuffer : mesh->getMeshBufferVector()) { @@ -811,54 +835,25 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape } } } +#endif // collapse parameter gets ignored break; case CElementShape::Type::PLY: _NBL_DEBUG_BREAK_IF(true); // this code has never been tested - mesh = loadModel(shape->ply.filename); + loadModel(shape->ply.filename); flipNormals = flipNormals!=shape->ply.flipNormals; faceNormals = shape->ply.faceNormals; maxSmoothAngle = shape->ply.maxSmoothAngle; - if (mesh && shape->ply.srgb) + if (shape->ply.srgb) + for (auto& ref : *pGeometries) { - uint32_t totalVertexCount = 0u; - for (auto meshbuffer : mesh->getMeshBuffers()) - totalVertexCount += IMeshManipulator::upperBoundVertexID(meshbuffer); - if (totalVertexCount) - { - constexpr uint32_t hidefRGBSize = 4u; - auto newRGBbuff = core::make_smart_refctd_ptr(hidefRGBSize*totalVertexCount); - newMesh = core::smart_refctd_ptr_static_cast(mesh->clone(1u)); - constexpr uint32_t COLOR_ATTR = 1u; - constexpr uint32_t COLOR_BUF_BINDING = 15u; - uint32_t* newRGB = reinterpret_cast(newRGBbuff->getPointer()); - uint32_t offset = 0u; - for (auto& meshbuffer : mesh->getMeshBufferVector()) - { - core::vectorSIMDf rgb; - for (uint32_t i=0u; meshbuffer->getAttribute(rgb,COLOR_ATTR,i); i++,offset++) - { - for (auto i=0; i<3u; i++) - rgb[i] = core::srgb2lin(rgb[i]); - ICPUMeshBuffer::setAttribute(rgb,newRGB+offset,asset::EF_A2B10G10R10_UNORM_PACK32); - } - auto newPipeline = core::smart_refctd_ptr_static_cast(meshbuffer->getPipeline()->clone(0u)); - auto& vtxParams = newPipeline->getVertexInputParams(); - vtxParams.attributes[COLOR_ATTR].format = EF_A2B10G10R10_UNORM_PACK32; - vtxParams.attributes[COLOR_ATTR].relativeOffset = 0u; - vtxParams.attributes[COLOR_ATTR].binding = COLOR_BUF_BINDING; - vtxParams.bindings[COLOR_BUF_BINDING].inputRate = EVIR_PER_VERTEX; - vtxParams.bindings[COLOR_BUF_BINDING].stride = hidefRGBSize; - vtxParams.enabledBindingFlags |= (1u<setPipeline(std::move(newPipeline)); - meshbuffer->setVertexBufferBinding({offset*hidefRGBSize,core::smart_refctd_ptr(newRGBbuff)},COLOR_BUF_BINDING); - } - } + // TODO: find the color attribute (it doesn't help we don't name them, just slap them in vectors) + // TODO: clone geometry + // TODO: change the color aux attribute's format from UNORM8 to SRGB } -#endif break; case CElementShape::Type::SERIALIZED: -// mesh = loadModel(shape->serialized.filename,shape->serialized.shapeIndex); + loadModel(shape->serialized.filename,shape->serialized.shapeIndex); flipNormals = flipNormals!=shape->serialized.flipNormals; faceNormals = shape->serialized.faceNormals; maxSmoothAngle = shape->serialized.maxSmoothAngle; From dada0d38b2dd3d33f5172ad321418b29114b5fe0 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 19 Feb 2026 11:07:37 +0400 Subject: [PATCH 06/54] Merge Fixes for Geotex streaming#952 (#1002) * Fix so that code can run in examples branch * update examples * update examples * ieee754::flipSign update, flipSign_helper::FloatingPoint fix * Change examples submodule * Update submodule pointer * Update CMakePresets.json, forgot to commit and push new n4ce preset * update examples * asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_NONE for NSC * add debug info flags to ILogicalDevice::compileShader * Point submodule to geotex branch * IUtilities Allow for null (0 sized) staging buffers for download/upload * Cmake "NBL_SKIP_BUILD_OPTIONS_VALIDATION": "ON" * Small MonoDeviceApp fix and update examples * update examples * Update Vulkan-Headers to 1.4.332 * update 3rdparty/Vulkan-Tools] * update smoke test to cover n4ce case, update 3rdparty/SPIRV-Tools submodule * add include/nbl/system/json.h and wipe nlohmann/json_fwd.hpp from public header interface * fix json ambiguity, update examples_tests submodule * update examples * update examples * update merged examples * update examples * Change submodule pointer to point to tgeotex streaming * update merged examples * after merge fixes * Update examples_tests pointer for CAD fix * Shorten resource archive output path * update examples * Split module load paths for build and relocatable package * Prioritize env and relative paths in relocatable package mode * update examples * update examples tests --------- Co-authored-by: Fletterio Co-authored-by: Arkadiusz Lachowicz Co-authored-by: Arkadiusz Lachowicz <34793522+AnastaZIuk@users.noreply.github.com> --- 3rdparty/CMakeLists.txt | 3 - 3rdparty/Vulkan-Headers | 2 +- 3rdparty/Vulkan-Tools | 2 +- CMakePresets.json | 27 +- cmake/common.cmake | 6 +- examples_tests | 2 +- .../MonoDeviceApplication.hpp | 3 +- include/nbl/asset/utils/IShaderCompiler.h | 18 +- include/nbl/builtin/hlsl/ieee754.hlsl | 4 +- include/nbl/system/IApplicationFramework.h | 14 +- include/nbl/system/json.h | 15 ++ include/nbl/video/ILogicalDevice.h | 1 + include/nbl/video/utilities/IUtilities.h | 18 +- smoke/CMakeLists.txt | 1 + smoke/main.cpp | 67 ++++- src/nbl/CMakeLists.txt | 1 + src/nbl/asset/utils/IShaderCompiler.cpp | 249 +++++++++++++++++- .../utils/shaderCompiler_serialization.h | 196 -------------- src/nbl/video/ILogicalDevice.cpp | 4 +- src/nbl/video/utilities/IUtilities.cpp | 10 + tools/nsc/main.cpp | 6 +- 21 files changed, 406 insertions(+), 243 deletions(-) create mode 100644 include/nbl/system/json.h delete mode 100644 src/nbl/asset/utils/shaderCompiler_serialization.h diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 705158b15a..68e821dfdf 100755 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -530,9 +530,6 @@ nbl_install_dir(imath/src/Imath) nbl_install_file(blake/c/blake3.h) -nbl_install_file_spec(nlohmann_json/include/nlohmann/json_fwd.hpp nlohmann) -nbl_install_file_spec(nlohmann_json/include/nlohmann/detail/abi_macros.hpp nlohmann/detail) - nbl_install_dir(boost/superproject/libs/preprocessor/include/boost) nbl_install_file_spec(renderdoc/renderdoc_app.h renderdoc) diff --git a/3rdparty/Vulkan-Headers b/3rdparty/Vulkan-Headers index 33d7f51258..3dda5a1a87 160000 --- a/3rdparty/Vulkan-Headers +++ b/3rdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 33d7f512583b8de44d1b6384aa1cf482f92e53e9 +Subproject commit 3dda5a1a87b62fdf3baf4680edc41c00e85a7a22 diff --git a/3rdparty/Vulkan-Tools b/3rdparty/Vulkan-Tools index 761e7bf273..4b6f7101c1 160000 --- a/3rdparty/Vulkan-Tools +++ b/3rdparty/Vulkan-Tools @@ -1 +1 @@ -Subproject commit 761e7bf2736f3ad326fdfc1b3c1543f4e669fd5c +Subproject commit 4b6f7101c15e09a8931f2f81c97146d0dfe68bc5 diff --git a/CMakePresets.json b/CMakePresets.json index 3117e607ac..88f093ac8f 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -295,7 +295,32 @@ "CMAKE_BUILD_TYPE": "Debug", "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" } - } + }, + { + "name": "n4ce-configure-windows-msvc", + "hidden": false, + "binaryDir": "build/production/n4ce", + "cacheVariables": { + "NBL_STATIC_BUILD": "OFF", + "NBL_SKIP_BUILD_OPTIONS_VALIDATION": "ON", + "CMAKE_SUPPRESS_REGENERATION": "OFF", + "NBL_COMPILER_DYNAMIC_RUNTIME": "ON", + "NBL_EMBED_BUILTIN_RESOURCES": "ON", + "NBL_UPDATE_GIT_SUBMODULE": "OFF", + "NBL_COMPILE_WITH_CUDA": "OFF", + "NBL_BUILD_OPTIX": "OFF", + "NBL_BUILD_MITSUBA_LOADER": "OFF", + "NBL_BUILD_RADEON_RAYS": "OFF", + "_NBL_COMPILE_WITH_OPEN_EXR_": "ON", + "NBL_EXPLICIT_MODULE_LOAD_LOG": "ON", + "NBL_CPACK_NO_BUILD_DIRECTORY_MODULES": "ON", + "GIT_FAIL_IF_NONZERO_EXIT": "OFF" + }, + "displayName": "[N4CE]: Dynamic library target, Visual Studio 17 2022 generator, MSVC v143 toolset", + "description": "Configure as dynamic library with Visual Studio 17 2022 generator and MSVC v143 toolset", + "generator": "Visual Studio 17 2022", + "toolset": "v143" + } ], "buildPresets": [ { diff --git a/cmake/common.cmake b/cmake/common.cmake index 6a3b5ec990..48a4098d97 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1656,7 +1656,11 @@ function(NBL_CREATE_RESOURCE_ARCHIVE) return() endif() - set(IMPL_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${IMPL_TARGET}") + if(DEFINED NBL_ROOT_PATH_BINARY AND NBL_ROOT_PATH_BINARY) + set(IMPL_OUTPUT_DIRECTORY "${NBL_ROOT_PATH_BINARY}/${IMPL_TARGET}") + else() + set(IMPL_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${IMPL_TARGET}") + endif() set(_BUNDLE_ARCHIVE_ABSOLUTE_PATH_ "") get_filename_component(_BUNDLE_SEARCH_DIRECTORY_ "${IMPL_BIND}" ABSOLUTE) diff --git a/examples_tests b/examples_tests index 655aa991e9..02c94ed67f 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 655aa991e96c8e1466d3c61c16f0d12fa36e86df +Subproject commit 02c94ed67f500188fbfd1fcb4a9b9b18eb426926 diff --git a/include/nbl/application_templates/MonoDeviceApplication.hpp b/include/nbl/application_templates/MonoDeviceApplication.hpp index a3a169d7b7..c7a94fe332 100644 --- a/include/nbl/application_templates/MonoDeviceApplication.hpp +++ b/include/nbl/application_templates/MonoDeviceApplication.hpp @@ -24,7 +24,8 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication virtual bool onAppTerminated() override { // break the circular references from queues tracking submit resources - m_device->waitIdle(); + if (m_device) + m_device->waitIdle(); m_device = nullptr; m_api = nullptr; return base_t::onAppTerminated(); diff --git a/include/nbl/asset/utils/IShaderCompiler.h b/include/nbl/asset/utils/IShaderCompiler.h index 9fd4eee833..0c24c2b1d0 100644 --- a/include/nbl/asset/utils/IShaderCompiler.h +++ b/include/nbl/asset/utils/IShaderCompiler.h @@ -12,9 +12,7 @@ #include "nbl/asset/IShader.h" #include "nbl/asset/utils/ISPIRVOptimizer.h" - -// Less leakage than "nlohmann/json.hpp" only forward declarations -#include "nlohmann/json_fwd.hpp" +#include "nbl/system/json.h" #include "nbl/builtin/hlsl/enums.hlsl" @@ -111,11 +109,10 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted // struct SMacroDefinition { - friend void to_json(nlohmann::json&, const SMacroDefinition&); - friend void from_json(const nlohmann::json&, SMacroDefinition&); - std::string_view identifier; std::string_view definition; + + friend struct system::json::adl_serializer; }; // @@ -222,9 +219,8 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted inline bool isStandardInclude() const { return standardInclude; } private: - friend void to_json(nlohmann::json& j, const SEntry::SPreprocessingDependency& dependency); - friend void from_json(const nlohmann::json& j, SEntry::SPreprocessingDependency& dependency); friend class CCache; + friend struct system::json::adl_serializer; // path or identifier system::path requestingSourceDir = ""; @@ -258,8 +254,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted friend class SCompilerArgs; friend class SEntry; friend class CCache; - friend void to_json(nlohmann::json&, const SPreprocessorArgs&); - friend void from_json(const nlohmann::json&, SPreprocessorArgs&); + friend struct system::json::adl_serializer; // Default constructor needed for json serialization of SCompilerArgs SPreprocessorArgs() {}; @@ -301,8 +296,7 @@ class NBL_API2 IShaderCompiler : public core::IReferenceCounted private: friend class SEntry; friend class CCache; - friend void to_json(nlohmann::json&, const SCompilerArgs&); - friend void from_json(const nlohmann::json&, SCompilerArgs&); + friend struct system::json::adl_serializer; // Default constructor needed for json serialization of SEntry SCompilerArgs() {} diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index af23d6f07d..0663d89c0b 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -261,8 +261,8 @@ struct flipSignIfRHSNegative_helper -NBL_CONSTEXPR_FUNC T flipSign(T val, U flip) +template +NBL_CONSTEXPR_FUNC T flipSign(T val, U flip = true) { return impl::flipSign_helper::__call(val, flip); } diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index 44d3de0427..30911e7092 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -114,11 +114,19 @@ class IApplicationFramework : public core::IReferenceCounted return true; }; - if (not load(module.dxc, { install.dxc, env.dxc, build.dxc, rel.dxc })) + #ifdef NBL_RELOCATABLE_PACKAGE + if (not load(module.dxc, { env.dxc, rel.dxc, install.dxc })) + #else + if (not load(module.dxc, { build.dxc })) + #endif return false; #ifdef _NBL_SHARED_BUILD_ - if (not load(module.nabla, { install.nabla, env.nabla, build.nabla, rel.nabla })) + #ifdef NBL_RELOCATABLE_PACKAGE + if (not load(module.nabla, { env.nabla, rel.nabla, install.nabla })) + #else + if (not load(module.nabla, { build.nabla })) + #endif return false; #endif @@ -234,4 +242,4 @@ class IApplicationFramework : public core::IReferenceCounted } #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/system/json.h b/include/nbl/system/json.h new file mode 100644 index 0000000000..1fdd0abf47 --- /dev/null +++ b/include/nbl/system/json.h @@ -0,0 +1,15 @@ +#ifndef _NBL_SYSTEM_JSON_H_INCLUDED_ +#define _NBL_SYSTEM_JSON_H_INCLUDED_ + +namespace nbl::system::json { + template struct adl_serializer; +} + +#define NBL_JSON_IMPL_BIND_ADL_SERIALIZER(T) \ +namespace nlohmann { \ + template<> \ + struct adl_serializer \ + : T {}; \ +} + +#endif // _NBL_SYSTEM_JSON_H_INCLUDED_ \ No newline at end of file diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 180342e2d4..a269be082e 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -831,6 +831,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe asset::IShaderCompiler::CCache* writeCache = nullptr; std::span extraDefines = {}; hlsl::ShaderStage stage = hlsl::ShaderStage::ESS_ALL_OR_LIBRARY; + core::bitflag debugInfoFlags = asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_NONE; }; core::smart_refctd_ptr compileShader(const SShaderCreationParameters& creationParams); diff --git a/include/nbl/video/utilities/IUtilities.h b/include/nbl/video/utilities/IUtilities.h index 7817df8d23..f52d5d36ef 100644 --- a/include/nbl/video/utilities/IUtilities.h +++ b/include/nbl/video/utilities/IUtilities.h @@ -37,8 +37,10 @@ class NBL_API2 IUtilities : public core::IReferenceCounted , m_allocationAlignment(allocationAlignment) , m_allocationAlignmentForBufferImageCopy(allocationAlignmentForBufferImageCopy) { - m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); - m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + if (m_defaultDownloadBuffer) + m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); + if (m_defaultUploadBuffer) + m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str()); } IUtilities() = delete; @@ -94,6 +96,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted core::smart_refctd_ptr > defaultDownloadBuffer = nullptr; // Try Create Download Buffer + if (downstreamSize > 0u) { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = downstreamSize; @@ -127,6 +130,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted defaultDownloadBuffer = core::make_smart_refctd_ptr>(asset::SBufferRange{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize); } // Try Create Upload Buffer + if (upstreamSize > 0u) { IGPUBuffer::SCreationParams streamingBufferCreationParams = {}; streamingBufferCreationParams.size = upstreamSize; @@ -374,6 +378,11 @@ class NBL_API2 IUtilities : public core::IReferenceCounted //! * data must not be nullptr inline bool updateBufferRangeViaStagingBuffer(SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange& bufferRange, IUpstreamingDataProducer& callback) { + if (!m_defaultUploadBuffer) + { + m_logger.log("no staging buffer available for upload. check `upstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (!bufferRange.isValid() || !bufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_DST_BIT)) { m_logger.log("Invalid `bufferRange` or buffer has no `EUF_TRANSFER_DST_BIT` usage flag, cannot `updateBufferRangeViaStagingBuffer`!", system::ILogger::ELL_ERROR); @@ -623,6 +632,11 @@ class NBL_API2 IUtilities : public core::IReferenceCounted template requires std::is_same_v, SIntendedSubmitInfo> inline bool downloadBufferRangeViaStagingBuffer(const std::function& consumeCallback, IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange& srcBufferRange) { + if (!m_defaultDownloadBuffer) + { + m_logger.log("no staging buffer available for download. check `downstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (!srcBufferRange.isValid() || !srcBufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_SRC_BIT)) { m_logger.log("Invalid `srcBufferRange` or buffer has no `EUF_TRANSFER_SRC_BIT` usage flag, cannot `downloadBufferRangeViaStagingBuffer`!",system::ILogger::ELL_ERROR); diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index a44374714d..c560e56a0f 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -28,6 +28,7 @@ find_package(Nabla REQUIRED CONFIG add_executable(smoke main.cpp pch.hpp cdb.ps1) target_link_libraries(smoke PRIVATE Nabla::Nabla) +target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) set(CMAKE_CTEST_ARGUMENTS --verbose) diff --git a/smoke/main.cpp b/smoke/main.cpp index 2b1863d404..530b29adae 100644 --- a/smoke/main.cpp +++ b/smoke/main.cpp @@ -1,3 +1,5 @@ +#include + #define ENABLE_SMOKE using namespace nbl; @@ -36,7 +38,25 @@ class Smoke final : public system::IApplicationFramework return false; } - exportGpuProfiles(); + if (!AfxWinInit(GetModuleHandle(nullptr), nullptr, GetCommandLineA(), 0)) + { + std::cerr << "[ERROR]: Could not init AFX, terminating!\n"; + return false; + } + + try { + createAfxDummyWindow(320, 240, nullptr, _T("Dummy 1")); + exportGpuProfiles(); + createAfxDummyWindow(320, 240, nullptr, _T("Dummy 2")); + } + catch (const std::exception& e) { + std::cerr << "[ERROR]: " << e.what() << '\n'; + return false; + } + catch (...) { + std::cerr << "[ERROR]: Unknown exception!\n"; + return false; + } return true; } @@ -44,37 +64,60 @@ class Smoke final : public system::IApplicationFramework void workLoopBody() override {} bool keepRunning() override { return false; } + bool onAppTerminated() override + { + AfxWinTerm(); + return true; + } + private: static void exportGpuProfiles() { - std::string arg2 = "-o"; - std::string buf; - std::string arg1; - std::string arg3; + std::string buf, arg1, arg2 = "-o", arg3; for (size_t i = 0;; i++) { - auto stringifiedIndex = std::to_string(i); - arg1 = "--json=" + stringifiedIndex; - arg3 = "device_" + stringifiedIndex + ".json"; - std::array args = { arg1.data(), arg2.data(), arg3.data() }; + auto six = std::to_string(i); + arg1 = "--json=" + six; + arg3 = "device_" + six + ".json"; + auto args = std::to_array({ arg1.data(), arg2.data(), arg3.data()}); int code = nbl::video::vulkaninfo(args); if (code != 0) break; - // print out file content std::ifstream input(arg3); while (std::getline(input, buf)) - { std::cout << buf << "\n"; - } std::cout << "\n\n"; } } + + static bool createAfxDummyWindow(int w, int h, HWND parent, LPCTSTR windowName) + { + CWnd wnd; + LPCTSTR cls = AfxRegisterWndClass(0, ::LoadCursor(nullptr, IDC_ARROW)); + if (!cls) return false; + + if (!wnd.CreateEx(0, cls, windowName, WS_POPUP | WS_VISIBLE, 0, 0, w, h, parent, nullptr)) + return false; + + MSG msg {}; + const ULONGLONG end = GetTickCount64() + 1000; + while (GetTickCount64() < end) { + while (PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + Sleep(1); + } + + wnd.DestroyWindow(); + return true; + } }; NBL_MAIN_FUNC(Smoke) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 53e45edd22..f69271e58c 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -805,6 +805,7 @@ target_include_directories(Nabla PUBLIC target_compile_definitions(Nabla INTERFACE "$" + "$" ) if(NBL_STATIC_BUILD) diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index a6cd95b441..3f5a3bab17 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/asset/utils/IShaderCompiler.h" #include "nbl/asset/utils/shadercUtils.h" -#include "nbl/asset/utils/shaderCompiler_serialization.h" #include #include @@ -13,10 +12,258 @@ #include #include +#include "nlohmann/json.hpp" + +using json = nlohmann::json; +using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; using namespace nbl; using namespace nbl::asset; +// -> serialization +// SMacroData, simple container used in SPreprocessorArgs +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = IShaderCompiler::SMacroDefinition; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "identifier", p.identifier }, + { "definition", p.definition }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("identifier").get_to(p.identifier); + j.at("definition").get_to(p.definition); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SPreprocessorData, holds serialized info for Preprocessor options used during compilation +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SPreprocessorArgs; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "sourceIdentifier", p.sourceIdentifier }, + { "extraDefines", p.extraDefines}, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("sourceIdentifier").get_to(p.sourceIdentifier); + j.at("extraDefines").get_to(p.extraDefines); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Optimizer pass has its own method for easier vector serialization +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = ISPIRVOptimizer::E_OPTIMIZER_PASS; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t value = static_cast(p); + j = ::json{ + { "optPass", value }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t aux; + j.at("optPass").get_to(aux); + p = static_cast(aux); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SCompilerArgs, holds serialized info for all Compilation options +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SCompilerArgs; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t shaderStage = static_cast(p.stage); + uint32_t spirvVersion = static_cast(p.targetSpirvVersion); + uint32_t debugFlags = static_cast(p.debugInfoFlags.value); + + j = ::json{ + { "shaderStage", shaderStage }, + { "spirvVersion", spirvVersion }, + { "optimizerPasses", p.optimizerPasses }, + { "debugFlags", debugFlags }, + { "preprocessorArgs", p.preprocessorArgs }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t shaderStage, spirvVersion, debugFlags; + j.at("shaderStage").get_to(shaderStage); + j.at("spirvVersion").get_to(spirvVersion); + j.at("optimizerPasses").get_to(p.optimizerPasses); + j.at("debugFlags").get_to(debugFlags); + j.at("preprocessorArgs").get_to(p.preprocessorArgs); + p.stage = static_cast(shaderStage); + p.targetSpirvVersion = static_cast(spirvVersion); + p.debugInfoFlags = core::bitflag(debugFlags); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Serialize clock's time point +using time_point_t = nbl::system::IFileBase::time_point_t; +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = time_point_t; + + static inline void to_json(::json& j, const value_t& p) + { + auto ticks = p.time_since_epoch().count(); + j = ::json{ + { "ticks", ticks }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint64_t ticks; + j.at("ticks").get_to(ticks); + p = time_point_t(time_point_t::clock::duration(ticks)); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// SDependency serialization. Dependencies will be saved in a vector for easier vectorization +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry::SPreprocessingDependency; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "requestingSourceDir", p.requestingSourceDir }, + { "identifier", p.identifier }, + { "hash", p.hash.data }, + { "standardInclude", p.standardInclude }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("requestingSourceDir").get_to(p.requestingSourceDir); + j.at("identifier").get_to(p.identifier); + j.at("hash").get_to(p.hash.data); + j.at("standardInclude").get_to(p.standardInclude); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized +struct CPUShaderCreationParams { + IShader::E_SHADER_STAGE stage; + std::string filepathHint; + uint64_t codeByteSize = 0; + uint64_t offset = 0; // Offset into the serialized .bin for the Cache where code starts + + CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) + : stage(_stage), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) {} + CPUShaderCreationParams() {}; +}; + +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = CPUShaderCreationParams; + + static inline void to_json(::json& j, const value_t& p) + { + uint32_t stage = static_cast(p.stage); + j = ::json{ + { "stage", stage }, + { "filepathHint", p.filepathHint }, + { "codeByteSize", p.codeByteSize }, + { "offset", p.offset }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + uint32_t stage; + j.at("stage").get_to(stage); + j.at("filepathHint").get_to(p.filepathHint); + j.at("codeByteSize").get_to(p.codeByteSize); + j.at("offset").get_to(p.offset); + p.stage = static_cast(stage); + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) + +// Serialize SEntry, keeping some fields as extra serialization to keep them separate on disk +namespace nbl::system::json { + template<> + struct adl_serializer + { + using value_t = SEntry; + + static inline void to_json(::json& j, const value_t& p) + { + j = ::json{ + { "mainFileContents", p.mainFileContents }, + { "compilerArgs", p.compilerArgs }, + { "hash", p.hash.data }, + { "lookupHash", p.lookupHash }, + { "dependencies", p.dependencies }, + { "uncompressedContentHash", p.uncompressedContentHash.data }, + { "uncompressedSize", p.uncompressedSize }, + }; + } + + static inline void from_json(const ::json& j, value_t& p) + { + j.at("mainFileContents").get_to(p.mainFileContents); + j.at("compilerArgs").get_to(p.compilerArgs); + j.at("hash").get_to(p.hash.data); + j.at("lookupHash").get_to(p.lookupHash); + j.at("dependencies").get_to(p.dependencies); + j.at("uncompressedContentHash").get_to(p.uncompressedContentHash.data); + j.at("uncompressedSize").get_to(p.uncompressedSize); + p.spirv = nullptr; + } + }; +} +NBL_JSON_IMPL_BIND_ADL_SERIALIZER(::nbl::system::json::adl_serializer) +// <- serialization + IShaderCompiler::IShaderCompiler(core::smart_refctd_ptr&& system) : m_system(std::move(system)) { diff --git a/src/nbl/asset/utils/shaderCompiler_serialization.h b/src/nbl/asset/utils/shaderCompiler_serialization.h deleted file mode 100644 index 6ad33a2ff5..0000000000 --- a/src/nbl/asset/utils/shaderCompiler_serialization.h +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef _NBL_ASSET_SHADER_COMPILER_SERIALIZATION_H_INCLUDED_ -#define _NBL_ASSET_SHADER_COMPILER_SERIALIZATION_H_INCLUDED_ - -#include "nbl/asset/utils/IShaderCompiler.h" -#include "nlohmann/json.hpp" - -using json = nlohmann::json; -using SEntry = nbl::asset::IShaderCompiler::CCache::SEntry; - - -namespace nbl::asset -{ - -// TODO: use NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE whenever possible - -// SMacroData, simple container used in SPreprocessorArgs - -inline void to_json(json& j, const IShaderCompiler::SMacroDefinition& macroData) -{ - j = json{ - { "identifier", macroData.identifier }, - { "definition", macroData.definition }, - }; -} - -inline void from_json(const json& j, IShaderCompiler::SMacroDefinition& macroData) -{ - j.at("identifier").get_to(macroData.identifier); - j.at("definition").get_to(macroData.definition); -} - -// SPreprocessorData, holds serialized info for Preprocessor options used during compilation -inline void to_json(json& j, const SEntry::SPreprocessorArgs& preprocArgs) -{ - j = json{ - { "sourceIdentifier", preprocArgs.sourceIdentifier }, - { "extraDefines", preprocArgs.extraDefines}, - }; -} - -inline void from_json(const json& j, SEntry::SPreprocessorArgs& preprocArgs) -{ - j.at("sourceIdentifier").get_to(preprocArgs.sourceIdentifier); - j.at("extraDefines").get_to(preprocArgs.extraDefines); -} - -// Optimizer pass has its own method for easier vector serialization - -inline void to_json(json& j, const ISPIRVOptimizer::E_OPTIMIZER_PASS& optPass) -{ - uint32_t value = static_cast(optPass); - j = json{ - { "optPass", value }, - }; -} - -inline void from_json(const json& j, ISPIRVOptimizer::E_OPTIMIZER_PASS& optPass) -{ - uint32_t aux; - j.at("optPass").get_to(aux); - optPass = static_cast(aux); -} - -// SCompilerArgs, holds serialized info for all Compilation options - -inline void to_json(json& j, const SEntry::SCompilerArgs& compilerData) -{ - uint32_t shaderStage = static_cast(compilerData.stage); - uint32_t spirvVersion = static_cast(compilerData.targetSpirvVersion); - uint32_t debugFlags = static_cast(compilerData.debugInfoFlags.value); - - j = json { - { "shaderStage", shaderStage }, - { "spirvVersion", spirvVersion }, - { "optimizerPasses", compilerData.optimizerPasses }, - { "debugFlags", debugFlags }, - { "preprocessorArgs", compilerData.preprocessorArgs }, - }; -} - -inline void from_json(const json& j, SEntry::SCompilerArgs& compilerData) -{ - uint32_t shaderStage, spirvVersion, debugFlags; - j.at("shaderStage").get_to(shaderStage); - j.at("spirvVersion").get_to(spirvVersion); - j.at("optimizerPasses").get_to(compilerData.optimizerPasses); - j.at("debugFlags").get_to(debugFlags); - j.at("preprocessorArgs").get_to(compilerData.preprocessorArgs); - compilerData.stage = static_cast(shaderStage); - compilerData.targetSpirvVersion = static_cast(spirvVersion); - compilerData.debugInfoFlags = core::bitflag(debugFlags); -} - -// Serialize clock's time point -using time_point_t = nbl::system::IFileBase::time_point_t; - -inline void to_json(json& j, const time_point_t& timePoint) -{ - auto ticks = timePoint.time_since_epoch().count(); - j = json{ - { "ticks", ticks }, - }; -} - -inline void from_json(const json& j, time_point_t& timePoint) -{ - uint64_t ticks; - j.at("ticks").get_to(ticks); - timePoint = time_point_t(time_point_t::clock::duration(ticks)); -} - -// SDependency serialization. Dependencies will be saved in a vector for easier vectorization - -inline void to_json(json& j, const SEntry::SPreprocessingDependency& dependency) -{ - j = json{ - { "requestingSourceDir", dependency.requestingSourceDir }, - { "identifier", dependency.identifier }, - { "hash", dependency.hash.data }, - { "standardInclude", dependency.standardInclude }, - }; -} - -inline void from_json(const json& j, SEntry::SPreprocessingDependency& dependency) -{ - j.at("requestingSourceDir").get_to(dependency.requestingSourceDir); - j.at("identifier").get_to(dependency.identifier); - j.at("hash").get_to(dependency.hash.data); - j.at("standardInclude").get_to(dependency.standardInclude); -} - -// We serialize shader creation parameters into a json, along with indexing info into the .bin buffer where the cache is serialized - -struct CPUShaderCreationParams { - IShader::E_SHADER_STAGE stage; - std::string filepathHint; - uint64_t codeByteSize = 0; - uint64_t offset = 0; // Offset into the serialized .bin for the Cache where code starts - - CPUShaderCreationParams(IShader::E_SHADER_STAGE _stage, std::string_view _filepathHint, uint64_t _codeByteSize, uint64_t _offset) - : stage(_stage), filepathHint(_filepathHint), codeByteSize(_codeByteSize), offset(_offset) - {} - - CPUShaderCreationParams() {}; -}; - -inline void to_json(json& j, const CPUShaderCreationParams& creationParams) -{ - uint32_t stage = static_cast(creationParams.stage); - j = json{ - { "stage", stage }, - { "filepathHint", creationParams.filepathHint }, - { "codeByteSize", creationParams.codeByteSize }, - { "offset", creationParams.offset }, - }; -} - -inline void from_json(const json& j, CPUShaderCreationParams& creationParams) -{ - uint32_t stage; - j.at("stage").get_to(stage); - j.at("filepathHint").get_to(creationParams.filepathHint); - j.at("codeByteSize").get_to(creationParams.codeByteSize); - j.at("offset").get_to(creationParams.offset); - creationParams.stage = static_cast(stage); -} - -// Serialize SEntry, keeping some fields as extra serialization to keep them separate on disk - -inline void to_json(json& j, const SEntry& entry) -{ - j = json{ - { "mainFileContents", entry.mainFileContents }, - { "compilerArgs", entry.compilerArgs }, - { "hash", entry.hash.data }, - { "lookupHash", entry.lookupHash }, - { "dependencies", entry.dependencies }, - { "uncompressedContentHash", entry.uncompressedContentHash.data }, - { "uncompressedSize", entry.uncompressedSize }, - }; -} - -inline void from_json(const json& j, SEntry& entry) -{ - j.at("mainFileContents").get_to(entry.mainFileContents); - j.at("compilerArgs").get_to(entry.compilerArgs); - j.at("hash").get_to(entry.hash.data); - j.at("lookupHash").get_to(entry.lookupHash); - j.at("dependencies").get_to(entry.dependencies); - j.at("uncompressedContentHash").get_to(entry.uncompressedContentHash.data); - j.at("uncompressedSize").get_to(entry.uncompressedSize); - entry.spirv = nullptr; -} - -} -#endif \ No newline at end of file diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index d5b38f9b69..a98deff5c7 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -360,9 +360,7 @@ core::smart_refctd_ptr ILogicalDevice::compileShader(const SShad commonCompileOptions.preprocessorOptions.extraDefines = creationParams.extraDefines; commonCompileOptions.stage = creationParams.stage; - commonCompileOptions.debugInfoFlags = - asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT | - asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_TOOL_BIT; + commonCompileOptions.debugInfoFlags = creationParams.debugInfoFlags; commonCompileOptions.spirvOptimizer = creationParams.optimizer; commonCompileOptions.preprocessorOptions.targetSpirvVersion = m_physicalDevice->getLimits().spirvVersion; diff --git a/src/nbl/video/utilities/IUtilities.cpp b/src/nbl/video/utilities/IUtilities.cpp index 46bda8a227..f6db104aa2 100644 --- a/src/nbl/video/utilities/IUtilities.cpp +++ b/src/nbl/video/utilities/IUtilities.cpp @@ -11,6 +11,11 @@ bool IUtilities::updateImageViaStagingBuffer( const std::span regions ) { + if (!m_defaultUploadBuffer) + { + m_logger.log("no staging buffer available for upload. check `upstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } auto* scratch = commonTransferValidation(intendedNextSubmit); if (!scratch) return false; @@ -164,6 +169,11 @@ bool IUtilities::downloadImageViaStagingBuffer( void* dest, const std::span regions ) { + if (!m_defaultDownloadBuffer) + { + m_logger.log("no staging buffer available for download. check `downstreamSize` passed to `IUtilities::create`",system::ILogger::ELL_ERROR); + return false; + } if (regions.empty()) return false; diff --git a/tools/nsc/main.cpp b/tools/nsc/main.cpp index 64ad684b0c..203aa6ce8c 100644 --- a/tools/nsc/main.cpp +++ b/tools/nsc/main.cpp @@ -16,7 +16,7 @@ #include "nbl/asset/metadata/CHLSLMetadata.h" #include "nlohmann/json.hpp" -using json = nlohmann::json; +using json = ::nlohmann::json; using namespace nbl; using namespace nbl::system; using namespace nbl::core; @@ -416,14 +416,14 @@ class ShaderCompiler final : public IApplicationFramework static void dumpBuildInfo(const argparse::ArgumentParser& program) { - json j; + ::json j; auto& modules = j["modules"]; auto serialize = [&](const gtml::GitInfo& info, std::string_view target) { auto& s = modules[target.data()]; s["isPopulated"] = info.isPopulated; - s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? json(info.hasUncommittedChanges.value()) : json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); + s["hasUncommittedChanges"] = info.hasUncommittedChanges.has_value() ? ::json(info.hasUncommittedChanges.value()) : ::json("UNKNOWN, BUILT WITHOUT DIRTY-CHANGES CAPTURE"); s["commitAuthorName"] = info.commitAuthorName; s["commitAuthorEmail"] = info.commitAuthorEmail; s["commitHash"] = info.commitHash; From 7e0938fd1f7209347ff1cc6f6522a6e30570a508 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 10:08:20 +0100 Subject: [PATCH 07/54] Install FullScreenTriangle in relocatable package --- src/nbl/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index f69271e58c..b70d345fcf 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -788,6 +788,16 @@ start_tracking_variables_for_propagation_to_parent() add_subdirectory(ext EXCLUDE_FROM_ALL) propagate_changed_variables_to_parent_scope() +if(TARGET ${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}) + set_target_properties(${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB} PROPERTIES EXCLUDE_FROM_ALL OFF) + nbl_install_lib_spec(${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB} "nbl/ext/FULL_SCREEN_TRIANGLE") + target_link_libraries(Nabla INTERFACE + "$:$/debug/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_d.lib>>" + "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE.lib>>" + "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_rwdi.lib>>" + ) +endif() + nbl_install_headers("${NABLA_HEADERS_PUBLIC}") set_target_properties(Nabla PROPERTIES DEBUG_POSTFIX _debug) set_target_properties(Nabla PROPERTIES RELWITHDEBINFO_POSTFIX _relwithdebinfo) From 45228f0eadca95699a077025cfc45bd14c10b9c7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 11:15:03 +0100 Subject: [PATCH 08/54] Build Full Nabla In CI --- .github/workflows/build-nabla.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index f92ffa7545..a434c0da04 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -31,7 +31,7 @@ jobs: matrix: # vendor: [msvc, clangcl] # TODO: Yas please fix ClangCL, we have a few new compile errors - # if we build MSVC then build "run-compiler-explorer" target, for ClangCL build just "nsc" + # build full Nabla preset, run-compiler-explorer is pulled in via ALL when Docker integration is enabled vendor: [msvc] config: [Release, Debug, RelWithDebInfo] tag: ['17.13.6'] @@ -125,12 +125,12 @@ jobs: --profiling-output=profiling/cmake-profiling.json ` --profiling-format=google-trace - - name: Container – Build NSC + - name: Container – Build Nabla run: | docker exec orphan ` ${{ env.entry }} ${{ env.cmd }} -Command cmake --build ` --preset ci-build-dynamic-${{ matrix.vendor }} ` - -t run-compiler-explorer --config ${{ matrix.config }} + --config ${{ matrix.config }} - name: Container – Install Nabla run: | @@ -397,4 +397,4 @@ jobs: run: cmake --build smoke/out --config ${{ matrix.config }} - name: CTest Smoke - run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} \ No newline at end of file + run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} From e4c8bb0894928e4698fbda723461bf7dea2f4644 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 11:49:36 +0100 Subject: [PATCH 09/54] Export FullScreenTriangle Builtins Library --- src/nbl/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index b70d345fcf..c3c69fce19 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -796,6 +796,16 @@ if(TARGET ${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}) "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE.lib>>" "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_rwdi.lib>>" ) + + set(_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB "${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}_builtinsBuild") + if(NBL_EMBED_BUILTIN_RESOURCES AND TARGET ${_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB}) + nbl_install_lib_spec(${_NBL_EXT_FULL_SCREEN_TRIANGLE_BUILTINS_LIB} "nbl/ext/FULL_SCREEN_TRIANGLE") + target_link_libraries(Nabla INTERFACE + "$:$/debug/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild_d.lib>>" + "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild.lib>>" + "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild_rwdi.lib>>" + ) + endif() endif() nbl_install_headers("${NABLA_HEADERS_PUBLIC}") From d7980d79ff47f89b18c8ec43e58062c9f2091994 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 13:14:08 +0100 Subject: [PATCH 10/54] Build And Install Nabla In One CI Step --- .github/workflows/build-nabla.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index a434c0da04..704040514e 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -122,23 +122,18 @@ jobs: docker exec orphan ` ${{ env.entry }} ${{ env.cmd }} -Command cmake ` --preset ci-configure-dynamic-${{ matrix.vendor }} ` + -DCMAKE_INSTALL_PREFIX:PATH=C:/mount/nabla/build-ct/install ` --profiling-output=profiling/cmake-profiling.json ` --profiling-format=google-trace - - name: Container – Build Nabla + - name: Container – Build & Install Nabla run: | docker exec orphan ` ${{ env.entry }} ${{ env.cmd }} -Command cmake --build ` --preset ci-build-dynamic-${{ matrix.vendor }} ` + --target install ` --config ${{ matrix.config }} - - name: Container – Install Nabla - run: | - docker exec orphan ` - ${{ env.entry }} ${{ env.cmd }} -Command cmake --install ` - ${{ env.binary }} --config ${{ matrix.config }} ` - --prefix ${{ env.install }} - - name: API / Examples / Check Run (Create) id: check-run-create uses: actions/github-script@v6 From a66f3afa0c6ca34a2475179afd7b203414d78563 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 13:22:29 +0100 Subject: [PATCH 11/54] tiny fixes --- include/nbl/asset/ICPUMorphTargets.h | 1 + include/nbl/asset/interchange/IAssetLoader.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/nbl/asset/ICPUMorphTargets.h b/include/nbl/asset/ICPUMorphTargets.h index 29924f9727..e5d775e1af 100644 --- a/include/nbl/asset/ICPUMorphTargets.h +++ b/include/nbl/asset/ICPUMorphTargets.h @@ -46,6 +46,7 @@ class NBL_API2 ICPUMorphTargets : public IAsset, public IMorphTargets* getTargets() { if (isMutable()) diff --git a/include/nbl/asset/interchange/IAssetLoader.h b/include/nbl/asset/interchange/IAssetLoader.h index 14c84f400b..5354228278 100644 --- a/include/nbl/asset/interchange/IAssetLoader.h +++ b/include/nbl/asset/interchange/IAssetLoader.h @@ -327,6 +327,7 @@ class NBL_API2 IAssetLoader : public virtual core::IReferenceCounted protected: // accessors for loaders + // TODO: deprecate SAssetBundle interm_getAssetInHierarchy(system::IFile* _file, const std::string& _supposedFilename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); SAssetBundle interm_getAssetInHierarchy(const std::string& _filename, const IAssetLoader::SAssetLoadParams& _params, uint32_t _hierarchyLevel, IAssetLoader::IAssetLoaderOverride* _override); // only the overload we use for now From f2c60f4812041974fad34e8d9df507cd969ce2eb Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 13:22:49 +0100 Subject: [PATCH 12/54] hook up external loaders to Mitsuba XML --- include/nbl/ext/MitsubaLoader/SContext.h | 8 +- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 123 ++++++++++++------- 2 files changed, 83 insertions(+), 48 deletions(-) diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 29602a3600..3370ed5535 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -14,10 +14,13 @@ namespace nbl::ext::MitsubaLoader { +class CMitsubaLoader; struct SContext final { public: + using interm_getAssetInHierarchy_t = asset::SAssetBundle(const char*, const uint16_t); + SContext( const asset::IAssetLoader::SAssetLoadContext& _params, asset::IAssetLoader::IAssetLoaderOverride* _override, @@ -25,9 +28,9 @@ struct SContext final ); using shape_ass_type = core::smart_refctd_ptr; - shape_ass_type loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape); + shape_ass_type loadBasicShape(const CElementShape* shape); // the `shape` will have to be `Type::SHAPEGROUP` - shape_ass_type loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape* shape); + shape_ass_type loadShapeGroup(const CElementShape* shape); inline void transferMetadata() { @@ -37,6 +40,7 @@ struct SContext final const asset::IAssetLoader::SAssetLoadContext inner; asset::IAssetLoader::IAssetLoaderOverride* override_; + std::function interm_getAssetInHierarchy; CMitsubaMetadata* meta; core::smart_refctd_ptr scene; diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 5e8bbca5eb..8026235c1e 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -2,19 +2,18 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + #include "nbl/builtin/hlsl/math/linalg/basic.hlsl" #include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" #include "nbl/ext/MitsubaLoader/ParserUtil.h" +#include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" #include -#if 0 -#include "nbl/asset/utils/CDerivativeMapCreator.h" +//#include "nbl/asset/utils/CDerivativeMapCreator.h" -#include "nbl/ext/MitsubaLoader/CMitsubaSerializedMetadata.h" -#endif #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) @@ -220,6 +219,10 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: _override, result.metadata.get() ); + ctx.interm_getAssetInHierarchy = [&](const char* filename, const uint16_t hierarchyOffset)->SAssetBundle + { + return this->interm_getAssetInHierarchy(filename,ctx.inner.params,_hierarchyLevel+hierarchyOffset,ctx.override_); + }; // ctx.scene->m_ambientLight = result.ambient; @@ -289,14 +292,14 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: continue; if (shapedef->type!=CElementShape::Type::INSTANCE) - addToScene(shapedef,ctx.loadBasicShape(_hierarchyLevel,shapedef)); + addToScene(shapedef,ctx.loadBasicShape(shapedef)); else // mitsuba is weird and lists instances under a shapegroup instead of having instances reference the shapegroup { // get group reference const CElementShape* parent = shapedef->instance.parent; if (!parent) // we should probably assert this continue; - addToScene(shapedef,ctx.loadShapeGroup(_hierarchyLevel,parent)); + addToScene(shapedef,ctx.loadShapeGroup(parent)); } } result.shapegroups.clear(); @@ -625,7 +628,7 @@ SContext::SContext( frontIR = material_compiler3::CFrontendIR::create(); } -auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type +auto SContext::loadShapeGroup(const CElementShape* shape) -> SContext::shape_ass_type { assert(shape->type==CElementShape::Type::SHAPEGROUP); const auto* const shapegroup = &shape->shapegroup; @@ -650,9 +653,9 @@ auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape shape_ass_type nestedCollection; if (child->type!=CElementShape::Type::SHAPEGROUP) - nestedCollection = loadBasicShape(hierarchyLevel,child); + nestedCollection = loadBasicShape(child); else - nestedCollection = loadShapeGroup(hierarchyLevel,child); + nestedCollection = loadShapeGroup(child); if (!nestedCollection) continue; @@ -673,7 +676,7 @@ auto SContext::loadShapeGroup(const uint32_t hierarchyLevel, const CElementShape return collection; } -auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape* shape) -> SContext::shape_ass_type +auto SContext::loadBasicShape(const CElementShape* shape) -> SContext::shape_ass_type { auto found = shapeCache.find(shape); if (found!=shapeCache.end()) @@ -697,69 +700,97 @@ auto SContext::loadBasicShape(const uint32_t hierarchyLevel, const CElementShape auto loadModel = [&](const char* filename, int64_t index=-1) -> void { -#if 0 - auto retval = interm_getAssetInHierarchy(filename,inner.params,hierarchyLevel+/*ICPUScene::GEOMETRY_COLLECTION_HIERARCHY_LEVELS_BELOW*/1,override_); - if (retval.getContents().empty()) + auto retval = interm_getAssetInHierarchy(filename,/*ICPUScene::GEOMETRY_COLLECTION_HIERARCHY_LEVELS_BELOW*/1); + auto contentRange = retval.getContents(); + if (contentRange.empty()) { - os::Printer::log(std::string("[ERROR] Could Not Find Mesh: ") + filename.svalue, ELL_ERROR); + inner.params.logger.log("Could Not Load Shape : %s",LoggerError,filename); return; } + + // we used to load with the IAssetLoader::ELPF_RIGHT_HANDED_MESHES flag, this means flipping the mesh x-axis + auto transform = math::linalg::diagonal(1.f); + transform[0][0] = -1.f; + + // + auto addCollectionGeometries = [&](const ICPUGeometryCollection* col)->void + { + if (col) + for (auto ref : col->getGeometries()) + { + if (ref.hasTransform()) + ref.transform = math::linalg::promoted_mul(ref.transform,transform); + else + ref.transform = transform; + addGeometry(std::move(ref)); + } + }; + + // take first target and replace the collection + auto addFirstTargetGeometries = [&](const ICPUMorphTargets* morph)->void + { + if (const auto& targets=morph->getTargets(); !targets.empty()) + addCollectionGeometries(targets.front().geoCollection.get()); + }; - uint32_t actualIndex = 0; switch (retval.getAssetType()) { case IAsset::ET_GEOMETRY: { - auto contentRange = retval.getContents(); + // only add one geometry, if we meant to add a whole collection, the file would load a collection + const IGeometry* geo = nullptr; auto serializedMeta = retval.getMetadata()->selfCast(); - // - if (index>=0ll && serializedMeta) for (auto it=contentRange.begin(); it!=contentRange.end(); it++) { - auto meshMeta = static_cast(serializedMeta->getAssetSpecificMetadata(IAsset::castDown(*it).get())); - if (meshMeta->m_id!=static_cast(index)) - continue; - actualIndex = it-contentRange.begin(); - break; - } - // - if (contentRange.begin()+actualIndex < contentRange.end()) - { - auto asset = contentRange.begin()[actualIndex]; - if (!asset) - { - return; - } - addGeometry(asset); + geo = IAsset::castDown(*it).get(); + assert(geo); + if (!serializedMeta || index<0ll || index>numeric_limits::max) // not Misuba serialized or shape index not specialized + break; + auto* const meta = serializedMeta->getAssetSpecificMetadata(static_cast(geo)); + assert(meta); + auto* const polygonMeta = static_cast(meta); + if (polygonMeta->m_id==static_cast(index)) + break; } + if (auto* const mg=const_cast*>(geo); mg) + addGeometry({.transform=transform,.geometry=core::smart_refctd_ptr>(mg)}); + break; } case IAsset::ET_GEOMETRY_COLLECTION: { - // TODO: replace the collection + // only add the first collection's geometries + addCollectionGeometries(IAsset::castDown(contentRange[0]).get()); break; } case IAsset::ET_MORPH_TARGETS: { - // TODO: take first target and replace the collection - _NBL_DEBUG_BREAK_IF(true); // we have no such loaders right now + addFirstTargetGeometries(IAsset::castDown(contentRange[0]).get()); break; } case IAsset::ET_SCENE: { - // TODO: flatten the scene into a single instance, this is path for OBJ loading - // NOTE: also need to preserve/forward the materials somehow (need to chape the `shape_ass_type` to have a default Material Binding Table) + // flatten the scene into a single instance, this is path for OBJ loading + const auto& instances = IAsset::castDown(contentRange[0])->getInstances(); + const auto instanceTforms = instances.getInitialTransforms(); + for (auto i=0u; isize(); + addFirstTargetGeometries(targets); + if (!instanceTforms.empty()) + for (auto geoIx=oldGeoBegin; geoIxsize(); geoIx++) + { + auto& ref = pGeometries->operator[](geoIx); + ref.transform = math::linalg::promoted_mul(instanceTforms[i],ref.transform); + } + // NOTE: also need to preserve/forward the materials somehow (need to chape the `shape_ass_type` to have a default Material Binding Table) + } + break; } default: - os::Printer::log("[ERROR] Loaded an Asset but it wasn't a mesh, was E_ASSET_TYPE " + std::to_string(retval.getAssetType()), ELL_ERROR); + inner.params.logger.log("Loaded an Asset but it didn't contain any geometry, was %s",LoggerError,system::to_string(retval.getAssetType())); break; } -#endif - // we used to load with the IAssetLoader::ELPF_RIGHT_HANDED_MESHES flag, this means flipping the mesh x-axis - for (auto& ref : *pGeometries) - { - ref.transform = math::linalg::diagonal(1.f); - ref.transform[0][0] = -1.f; - } }; bool flipNormals = false; From dc31aa0e4abd5dc0af95ef3f3d2443c53f039710 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 19 Feb 2026 13:26:18 +0100 Subject: [PATCH 13/54] post merge submodule update --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 02c94ed67f..6ebb966161 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 02c94ed67f500188fbfd1fcb4a9b9b18eb426926 +Subproject commit 6ebb9661618c01e00498c43481f610586200885a From 03b5e34ee0a589c760a96aa2720900bd32840ea4 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Thu, 19 Feb 2026 15:24:06 +0100 Subject: [PATCH 14/54] Fix FullScreenTriangle Builtins RelWithDebInfo Name --- src/nbl/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index c3c69fce19..39d74994da 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -803,7 +803,7 @@ if(TARGET ${NBL_EXT_FULL_SCREEN_TRIANGLE_LIB}) target_link_libraries(Nabla INTERFACE "$:$/debug/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild_d.lib>>" "$:$/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild.lib>>" - "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild_rwdi.lib>>" + "$:$/relwithdebinfo/lib/nbl/ext/FULL_SCREEN_TRIANGLE/NblExtFULL_SCREEN_TRIANGLE_builtinsBuild.lib>>" ) endif() endif() From 86779cc611dec46c5aa3a342b23f9caae1345ed6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Fri, 20 Feb 2026 13:12:07 +0100 Subject: [PATCH 15/54] Fix NSC lookup mode for build and install runs --- cmake/common.cmake | 1 + include/nbl/system/IApplicationFramework.h | 56 +++++++++++++--------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index 48a4098d97..dbe30dc3d7 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1535,6 +1535,7 @@ namespace @IMPL_NAMESPACE@ { endif() set(NBL_NSC_COMPILE_COMMAND + "${CMAKE_COMMAND}" -E env "NBL_RUN_FROM_BUILD_INTERFACE=$<$>>:1>" "$" -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index 30911e7092..f17859ebb2 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -26,22 +26,39 @@ class IApplicationFramework : public core::IReferenceCounted static bool GlobalsInit() { // TODO: update CMake and rename "DLL" in all of those defines here to "MODULE" or "RUNTIME" - - auto getEnvInstallDirectory = []() + auto resolveDir = [](const char* value) { - const char* sdk = std::getenv("NBL_INSTALL_DIRECTORY"); + if (!value || (value[0] == '\0')) + return system::path(""); - if (sdk) - { - const auto directory = system::path(sdk); - - if (std::filesystem::exists(directory)) - return directory; - } + const auto candidate = system::path(value); + if (std::filesystem::exists(candidate)) + return candidate; return system::path(""); }; + auto readEnvFlag = [](const char* key) + { + const char* value = std::getenv(key); + if (!value || (value[0] == '\0')) + return false; + + const std::string_view v(value); + return (v != "0") && (v != "false") && (v != "off") && (v != "no"); + }; + + const auto sdk = resolveDir(std::getenv("NBL_INSTALL_DIRECTORY")); + + #ifdef NBL_RELOCATABLE_PACKAGE + // Relocatable package consumers must use install lookups only. + const bool useInstallLookups = true; + #else + // Build-interface binaries select lookup mode at runtime via NBL_RUN_FROM_BUILD_INTERFACE. + // This is required because the same host-built executable can later be run from an install package. + const bool useInstallLookups = !readEnvFlag("NBL_RUN_FROM_BUILD_INTERFACE"); + #endif // NBL_RELOCATABLE_PACKAGE + constexpr struct { std::string_view nabla, dxc; @@ -56,8 +73,6 @@ class IApplicationFramework : public core::IReferenceCounted "dxcompiler" }; - const auto sdk = getEnvInstallDirectory(); - struct { system::path nabla, dxc; @@ -70,6 +85,7 @@ class IApplicationFramework : public core::IReferenceCounted install.dxc = std::filesystem::absolute(system::path(_NABLA_INSTALL_DIR_) / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY); #endif + //! ABS key is full key to file inside relocatable package env.nabla = sdk / NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; env.dxc = sdk / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY; #endif @@ -83,6 +99,7 @@ class IApplicationFramework : public core::IReferenceCounted build.dxc = path(_DXC_DLL_).parent_path(); #endif + //! consumer can set this as relative path between exe & DLLs #ifdef NBL_CPACK_PACKAGE_NABLA_DLL_DIR rel.nabla = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; #endif @@ -91,7 +108,8 @@ class IApplicationFramework : public core::IReferenceCounted rel.dxc = NBL_CPACK_PACKAGE_DXC_DLL_DIR; #endif - auto load = [](std::string_view moduleName, const std::vector& searchPaths) + using RV = const std::vector; + auto load = [](std::string_view moduleName, const RV& searchPaths) { #ifdef _NBL_PLATFORM_WINDOWS_ const bool isAlreadyLoaded = GetModuleHandleA(moduleName.data()); @@ -114,19 +132,11 @@ class IApplicationFramework : public core::IReferenceCounted return true; }; - #ifdef NBL_RELOCATABLE_PACKAGE - if (not load(module.dxc, { env.dxc, rel.dxc, install.dxc })) - #else - if (not load(module.dxc, { build.dxc })) - #endif + if (not load(module.dxc, useInstallLookups ? RV{ rel.dxc, env.dxc, install.dxc } : RV{ build.dxc })) return false; #ifdef _NBL_SHARED_BUILD_ - #ifdef NBL_RELOCATABLE_PACKAGE - if (not load(module.nabla, { env.nabla, rel.nabla, install.nabla })) - #else - if (not load(module.nabla, { build.nabla })) - #endif + if (not load(module.nabla, useInstallLookups ? RV{ rel.nabla, env.nabla, install.nabla } : RV{ build.nabla })) return false; #endif From 0093d517a1aa40c4b438e59c70b931315479f30e Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 21:47:35 +0100 Subject: [PATCH 16/54] make more `operator bool()` explicit, notice a missing `_BIT` suffix in a BLAS flag, add a `CAssetConverter::SReserveResult::moveGPUObjects`, make `IPreHashed` helpers more const poiter friendly --- include/nbl/asset/IAccelerationStructure.h | 3 ++- include/nbl/asset/IGeometry.h | 4 ++-- include/nbl/asset/IMorphTargets.h | 4 ++-- include/nbl/asset/IPolygonGeometry.h | 2 +- include/nbl/asset/IPreHashed.h | 2 +- include/nbl/asset/IRenderpass.h | 2 +- include/nbl/video/IGPUPipeline.h | 2 +- include/nbl/video/ILogicalDevice.h | 2 +- include/nbl/video/SPipelineCreationParams.h | 2 +- include/nbl/video/utilities/CAssetConverter.h | 14 ++++++++++++-- include/nbl/video/utilities/CSmoothResizeSurface.h | 2 +- src/nbl/video/utilities/CAssetConverter.cpp | 2 +- 12 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 6d64a2b769..a1d325a2a3 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -76,7 +76,8 @@ class IBottomLevelAccelerationStructure : public IAccelerationStructure // Provided by VK_NV_displacement_micromap ALLOW_DISPLACEMENT_MICROMAP_UPDATE_BIT = 0x1u<<9u, // Provided by VK_KHR_ray_tracing_position_fetch - ALLOW_DATA_ACCESS = 0x1u<<11u, + ALLOW_DATA_ACCESS_BIT = 0x1u<<11u, + ALLOW_DATA_ACCESS = ALLOW_DATA_ACCESS_BIT // deprecated }; diff --git a/include/nbl/asset/IGeometry.h b/include/nbl/asset/IGeometry.h index 2e76e4bde7..319c98d99e 100644 --- a/include/nbl/asset/IGeometry.h +++ b/include/nbl/asset/IGeometry.h @@ -176,7 +176,7 @@ class IGeometryBase : public virtual core::IReferenceCounted struct SDataViewBase { // mostly checking validity of the format - inline operator bool() const {return format==EF_UNKNOWN || !isBlockCompressionFormat(format) && !isDepthOrStencilFormat(format);} + explicit inline operator bool() const {return format==EF_UNKNOWN || !isBlockCompressionFormat(format) && !isDepthOrStencilFormat(format);} // inline bool isFormatted() const {return format!=EF_UNKNOWN && bool(*this);} @@ -294,7 +294,7 @@ class IGeometry : public std::conditional_t() const diff --git a/include/nbl/asset/IMorphTargets.h b/include/nbl/asset/IMorphTargets.h index 6f208c6f73..27c1bba5c5 100644 --- a/include/nbl/asset/IMorphTargets.h +++ b/include/nbl/asset/IMorphTargets.h @@ -22,7 +22,7 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted inline index_t() = default; explicit inline index_t(uint32_t _value) : value(_value) {} - inline operator bool() const {return value!=(~0u);} + explicit inline operator bool() const {return value!=(~0u);} uint32_t value = ~0u; }; @@ -68,7 +68,7 @@ class NBL_API2 IMorphTargets : public virtual core::IReferenceCounted struct STarget { - inline operator bool() const + explicit inline operator bool() const { if (!geoCollection) return false; diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index 023c9e462a..6597f19810 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -208,7 +208,7 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG struct SJointWeight { // one thing this doesn't check is whether every vertex has a weight and index - inline operator bool() const {return indices && isIntegerFormat(indices.composed.format) && weights && weights.composed.isFormatted() && indices.getElementCount()==weights.getElementCount();} + explicit inline operator bool() const {return indices && isIntegerFormat(indices.composed.format) && weights && weights.composed.isFormatted() && indices.getElementCount()==weights.getElementCount();} SDataView indices; // Assumption is that only non-zero weights are present, which is why the joints are indexed (sparseness) diff --git a/include/nbl/asset/IPreHashed.h b/include/nbl/asset/IPreHashed.h index f7252211e1..655c53656b 100644 --- a/include/nbl/asset/IPreHashed.h +++ b/include/nbl/asset/IPreHashed.h @@ -39,7 +39,7 @@ class IPreHashed : public IAsset discardContent_impl(); } - static inline void discardDependantsContents(const std::span roots) + static inline void discardDependantsContents(const std::span roots) { core::vector stack; core::unordered_set alreadyVisited; // whether we have push the node to the stack diff --git a/include/nbl/asset/IRenderpass.h b/include/nbl/asset/IRenderpass.h index ce41e35573..a46210f73e 100644 --- a/include/nbl/asset/IRenderpass.h +++ b/include/nbl/asset/IRenderpass.h @@ -302,7 +302,7 @@ class NBL_API2 IRenderpass uint32_t dependencyCount = 0u; int8_t viewMaskMSB = -1; - inline operator bool() const {return subpassCount;} + explicit inline operator bool() const {return subpassCount;} inline bool hasViewMasks() const {return viewMaskMSB>=0;} }; diff --git a/include/nbl/video/IGPUPipeline.h b/include/nbl/video/IGPUPipeline.h index c22ad998db..00a8f71414 100644 --- a/include/nbl/video/IGPUPipeline.h +++ b/include/nbl/video/IGPUPipeline.h @@ -90,7 +90,7 @@ class IGPUPipelineBase { .count = dataSize ? static_cast(count) : 0, .dataSize = static_cast(dataSize), }; - return *retval; + return bool(*retval); } const asset::IShader* shader = nullptr; diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index a269be082e..ae351fdecd 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -410,7 +410,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe // struct AccelerationStructureBuildSizes { - inline operator bool() const { return accelerationStructureSize!=(~0ull); } + explicit inline operator bool() const { return accelerationStructureSize!=(~0ull); } size_t accelerationStructureSize = ~0ull; size_t updateScratchSize = ~0ull; diff --git a/include/nbl/video/SPipelineCreationParams.h b/include/nbl/video/SPipelineCreationParams.h index 3a25560ae4..0971102599 100644 --- a/include/nbl/video/SPipelineCreationParams.h +++ b/include/nbl/video/SPipelineCreationParams.h @@ -14,7 +14,7 @@ namespace nbl::video struct SSpecializationValidationResult { constexpr static inline uint32_t Invalid = ~0u; - inline operator bool() const + explicit inline operator bool() const { return count!=Invalid && dataSize!=Invalid; } diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h index a360e3b0f5..0b47fa7229 100644 --- a/include/nbl/video/utilities/CAssetConverter.h +++ b/include/nbl/video/utilities/CAssetConverter.h @@ -1048,7 +1048,7 @@ class CAssetConverter : public core::IReferenceCounted } // - inline operator bool() const {return bool(m_converter);} + explicit inline operator bool() const {return bool(m_converter);} // Until `convert` is called, the Buffers and Images are not filled with content and Acceleration Structures are not built, unless found in the `SInput::readCache` // WARNING: The Acceleration Structure Pointer WILL CHANGE after calling `convert` if its patch dictates that it will be compacted! (since AS can't resize) @@ -1056,6 +1056,16 @@ class CAssetConverter : public core::IReferenceCounted template std::span> getGPUObjects() const {return std::get>(m_gpuObjects);} + // after a successful conversion you can move the GPU objects over + template + bool moveGPUObjects(vector_t& out) + { + if (m_converter) + return false; + out = std::move(std::get>(m_gpuObjects)); + return true; + } + // If you ever need to look up the content hashes of the assets AT THE TIME you converted them // REMEMBER it can have stale hashes (asset or its dependants mutated since hash computed), // then you can get hash mismatches or plain wrong hashes. @@ -1106,7 +1116,7 @@ class CAssetConverter : public core::IReferenceCounted // we don't insert into the writeCache until conversions are successful core::tuple_transform_t m_stagingCaches; - // converted IShaders do not have any object that hold a smartptr into them, so we have to persist them in this vector to prevent m_stagingCacheds hold a raw dangling pointer into them + // converted IShaders do not have any object that hold a smartptr into them, so we have to persist them in this vector to prevent m_stagingCacheds hold a raw dangling pointer into them core::vector> m_shaders; // need a more explicit list of GPU objects that need device-assisted conversion diff --git a/include/nbl/video/utilities/CSmoothResizeSurface.h b/include/nbl/video/utilities/CSmoothResizeSurface.h index 4d3a243b90..017f1cd2d4 100644 --- a/include/nbl/video/utilities/CSmoothResizeSurface.h +++ b/include/nbl/video/utilities/CSmoothResizeSurface.h @@ -107,7 +107,7 @@ class NBL_API2 ISmoothResizeSurface : public ISimpleManagedSurface struct SCachedPresentInfo { - inline operator bool() const {return source.image && waitSemaphore && waitValue && pPresentSemaphoreWaitValue;} + explicit inline operator bool() const {return source.image && waitSemaphore && waitValue && pPresentSemaphoreWaitValue;} SPresentSource source = {}; // only allow waiting for one semaphore, because there's only one source to present! diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d7f2d7dbbc..91d53eaf7c 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -4165,7 +4165,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul struct SMissingDependent { // This only checks if whether we had to convert and failed, but the dependent might be in readCache of one or more converters, so if in doubt assume its okay - inline operator bool() const {return wasInStaging && gotWiped;} + explicit inline operator bool() const {return wasInStaging && gotWiped;} bool wasInStaging; bool gotWiped; From ad362fc3253e0463f73a240b8c23d5dbda8bbbdb Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 23:05:12 +0100 Subject: [PATCH 17/54] went a bit overboard with previous commit, also fix inifinite recursion bug due to bad const resolution --- include/nbl/asset/IAsset.h | 6 +- include/nbl/asset/IPreHashed.h | 106 ++++++++++---------- src/nbl/video/utilities/CAssetConverter.cpp | 2 +- 3 files changed, 58 insertions(+), 56 deletions(-) diff --git a/include/nbl/asset/IAsset.h b/include/nbl/asset/IAsset.h index b7142713bf..c6589cf043 100644 --- a/include/nbl/asset/IAsset.h +++ b/include/nbl/asset/IAsset.h @@ -169,9 +169,11 @@ class IAsset : virtual public core::IReferenceCounted inline void visitDependents(std::function visit) { assert(isMutable()); - visitDependents([&](const IAsset* dependent) -> bool + visitDependents_impl([&](const IAsset* dep) -> bool { - return visit(const_cast(dependent)); + if (dep) + return visit(const_cast(dep)); + return true; }); } diff --git a/include/nbl/asset/IPreHashed.h b/include/nbl/asset/IPreHashed.h index 655c53656b..50d9e71b21 100644 --- a/include/nbl/asset/IPreHashed.h +++ b/include/nbl/asset/IPreHashed.h @@ -39,61 +39,61 @@ class IPreHashed : public IAsset discardContent_impl(); } - static inline void discardDependantsContents(const std::span roots) - { - core::vector stack; - core::unordered_set alreadyVisited; // whether we have push the node to the stack - auto push = [&stack,&alreadyVisited](IAsset* node) -> bool - { - const auto [dummy,inserted] = alreadyVisited.insert(node); - if (inserted) - stack.push_back(node); - return true; - }; - for (const auto& root : roots) - push(root); - while (!stack.empty()) - { - auto* entry = stack.back(); - stack.pop_back(); - entry->visitDependents(push); - // pre order traversal does discard - auto* isPrehashed = dynamic_cast(entry); - if (isPrehashed) - isPrehashed->discardContent(); - } - } - static inline bool anyDependantDiscardedContents(const IAsset* root) - { - core::vector stack; - core::unordered_set alreadyVisited; // whether we have push the node to the stack - bool result = false; - auto push = [&stack,&alreadyVisited,&result](const IAsset* node) -> bool - { - const auto [dummy,inserted] = alreadyVisited.insert(node); - if (inserted) + static inline void discardDependantsContents(const std::span roots) { - auto* isPrehashed = dynamic_cast(node); - if (isPrehashed && isPrehashed->missingContent()) - { - stack.clear(); - result = true; - return false; - } - stack.push_back(node); + core::vector stack; + core::unordered_set alreadyVisited; // whether we have push the node to the stack + auto push = [&stack,&alreadyVisited](IAsset* node) -> bool + { + const auto [dummy,inserted] = alreadyVisited.insert(node); + if (inserted) + stack.push_back(node); + return true; + }; + for (const auto& root : roots) + push(root); + while (!stack.empty()) + { + auto* entry = stack.back(); + stack.pop_back(); + entry->visitDependents(push); + // pre order traversal does discard + auto* isPrehashed = dynamic_cast(entry); + if (isPrehashed) + isPrehashed->discardContent(); + } + } + static inline bool anyDependantDiscardedContents(const IAsset* root) + { + core::vector stack; + core::unordered_set alreadyVisited; // whether we have push the node to the stack + bool result = false; + auto push = [&stack,&alreadyVisited,&result](const IAsset* node) -> bool + { + const auto [dummy,inserted] = alreadyVisited.insert(node); + if (inserted) + { + auto* isPrehashed = dynamic_cast(node); + if (isPrehashed && isPrehashed->missingContent()) + { + stack.clear(); + result = true; + return false; + } + stack.push_back(node); + } + return true; + }; + if (!push(root)) + return true; + while (!stack.empty()) + { + auto* entry = stack.back(); + stack.pop_back(); + entry->visitDependents(push); + } + return result; } - return true; - }; - if (!push(root)) - return true; - while (!stack.empty()) - { - auto* entry = stack.back(); - stack.pop_back(); - entry->visitDependents(push); - } - return result; - } protected: inline IPreHashed() = default; diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 91d53eaf7c..d7f2d7dbbc 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -4165,7 +4165,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul struct SMissingDependent { // This only checks if whether we had to convert and failed, but the dependent might be in readCache of one or more converters, so if in doubt assume its okay - explicit inline operator bool() const {return wasInStaging && gotWiped;} + inline operator bool() const {return wasInStaging && gotWiped;} bool wasInStaging; bool gotWiped; From 08d3808297040ca7f51961fad63f4a3bf2bb3d93 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 23:10:35 +0100 Subject: [PATCH 18/54] nobody used ICPUTLAS's build ranges --- include/nbl/asset/ICPUAccelerationStructure.h | 42 ++++++++----------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/include/nbl/asset/ICPUAccelerationStructure.h b/include/nbl/asset/ICPUAccelerationStructure.h index 4973d7c433..04f160459a 100644 --- a/include/nbl/asset/ICPUAccelerationStructure.h +++ b/include/nbl/asset/ICPUAccelerationStructure.h @@ -247,13 +247,16 @@ class ICPUBottomLevelAccelerationStructure final : public IPreHashed, public IBo geometryCount = m_triangleGeoms->size(); } - // https://registry.khronos.org/vulkan/specs/latest/man/html/vkGetAccelerationStructureBuildSizesKHR.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 - if (geometryCount == 0) { - if (m_geometryPrimitiveCount && m_geometryPrimitiveCount->size() > 0) return false; + // https://registry.khronos.org/vulkan/specs/latest/man/html/vkGetAccelerationStructureBuildSizesKHR.html#VUID-vkGetAccelerationStructureBuildSizesKHR-pBuildInfo-03619 + if (geometryCount == 0) + { + if (m_geometryPrimitiveCount && m_geometryPrimitiveCount->size() > 0) + return false; } - else + else { - if (!m_geometryPrimitiveCount || m_geometryPrimitiveCount->size() != geometryCount) return false; + if (!m_geometryPrimitiveCount || m_geometryPrimitiveCount->size() != geometryCount) + return false; } return true; } @@ -275,7 +278,7 @@ class ICPUBottomLevelAccelerationStructure final : public IPreHashed, public IBo core::smart_refctd_dynamic_array m_geometryPrimitiveCount = nullptr; core::bitflag m_buildFlags = BUILD_FLAGS::PREFER_FAST_TRACE_BIT; - inline void visitDependents_impl(std::function visit) const override {} + inline void visitDependents_impl(std::function visit) const override {} }; class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelAccelerationStructure @@ -287,14 +290,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA // ICPUTopLevelAccelerationStructure() = default; - - // - inline auto& getBuildRangeInfo() - { - assert(isMutable()); - return m_buildRangeInfo; - } - inline auto& getBuildRangeInfo() const {return m_buildRangeInfo;} // inline core::bitflag getBuildFlags() const {return m_buildFlags;} @@ -365,7 +360,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA auto cp = core::make_smart_refctd_ptr(); cp->m_instances = core::make_refctd_dynamic_array>(*m_instances); - cp->m_buildRangeInfo = m_buildRangeInfo; cp->m_buildFlags = m_buildFlags; if (_depth--) @@ -385,9 +379,6 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA if (!m_instances) return false; for (const auto& instance : *m_instances) if (!instance.getBase().blas->valid()) return false; - if (m_buildRangeInfo.instanceCount != m_instances->size()) return false; - // https://registry.khronos.org/vulkan/specs/latest/man/html/VkAccelerationStructureBuildRangeInfoKHR.html#VUID-VkAccelerationStructureBuildRangeInfoKHR-primitiveOffset-03660 - if (m_buildRangeInfo.instanceByteOffset % 16 != 0) return false; return true; } @@ -396,15 +387,16 @@ class ICPUTopLevelAccelerationStructure final : public IAsset, public ITopLevelA private: core::smart_refctd_dynamic_array m_instances = nullptr; - hlsl::acceleration_structures::top_level::BuildRangeInfo m_buildRangeInfo; core::bitflag m_buildFlags = BUILD_FLAGS::PREFER_FAST_BUILD_BIT; - inline void visitDependents_impl(std::function visit) const override - { - if (!m_instances) return; - for (const auto& instance : *m_instances) - if (!visit(instance.getBase().blas.get())) return; - } + inline void visitDependents_impl(std::function visit) const override + { + if (!m_instances) + return; + for (const auto& instance : *m_instances) + if (!visit(instance.getBase().blas.get())) + return; + } }; } From e2f981f4bee5bb69755bc79f6316273f538dd5e7 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 23:11:13 +0100 Subject: [PATCH 19/54] rework the BLAS exports of geometry collections completely --- include/nbl/asset/ICPUGeometryCollection.h | 16 +++--- include/nbl/asset/IGeometryCollection.h | 57 +++++++++++++------- src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp | 2 +- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/include/nbl/asset/ICPUGeometryCollection.h b/include/nbl/asset/ICPUGeometryCollection.h index d231f1df00..9e84de833e 100644 --- a/include/nbl/asset/ICPUGeometryCollection.h +++ b/include/nbl/asset/ICPUGeometryCollection.h @@ -84,17 +84,17 @@ class NBL_API2 ICPUGeometryCollection : public IAsset, public IGeometryCollectio } // - template// requires std::is_same_v()),decltype(ICPUBottomLevelAccelerationStructure::Triangles&)> - inline Iterator exportForBLAS(Iterator out, uint32_t* pWrittenOrdinals=nullptr) const + class CBLASExporter final : public IBLASExporter { - return exportForBLAS(std::forward(out),[](const hlsl::float32_t3x4& lhs, const hlsl::float32_t3x4& rhs)->void + protected: + inline void setTransform(BLASTriangles& out, const uint32_t geomIndex) override { - lhs = rhs; - if (pWrittenOrdinals) - *(pWrittenOrdinals++) = (ptrdiff_t(&rhs)-offsetof(SGeometryReference,transform)-ptrdiff_t(base_t::m_geometries.data()))/sizeof(SGeometryReference); + out.transform = m_geoms[geomIndex].transform; } - ); - } + + public: + inline CBLASExporter(const core::vector& _geoms) : IBLASExporter(_geoms) {} + }; protected: // diff --git a/include/nbl/asset/IGeometryCollection.h b/include/nbl/asset/IGeometryCollection.h index 3d62ad53cf..ffebf81a22 100644 --- a/include/nbl/asset/IGeometryCollection.h +++ b/include/nbl/asset/IGeometryCollection.h @@ -56,7 +56,45 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted inline bool isSkinned() const {return getJointCount()>0;} // View of matrices being the inverse bind pose inline const SDataView& getInverseBindPoseView() const {return m_inverseBindPoseView;} + + + // + class IBLASExporter + { + protected: + using BLASTriangles = IBottomLevelAccelerationStructure::Triangles>; + inline IBLASExporter(const core::vector& _geoms) : m_geoms(_geoms) {} + virtual void setTransform(BLASTriangles& out, const uint32_t geomIndex) = 0; + + const core::vector& m_geoms; + + public: + template // requires (std::is_same_v()),decltype(BLASTriangles&)> && PrimCountIter is integral && OrdinalIter is also) + inline TriIter operator()(TriIter outIt, PrimCountIter outPrimCount, uint32_t* pWrittenOrdinals=nullptr) + { + for (const auto& ref : m_geoms) + { + // not a polygon geometry + const auto* geo = ref.geometry.get(); + if (geo->getPrimitiveType()!=IGeometryBase::EPrimitiveType::Polygon) + continue; + const auto ordinal = std::distance(m_geoms.data(),&ref); + const auto* polyGeo = static_cast*>(geo); + *outIt = polyGeo->exportForBLAS(); + if (outIt->vertexData[0]) + { + if (pWrittenOrdinals) + *(pWrittenOrdinals++) = ordinal; + *(outPrimCount++) = polyGeo->getPrimitiveCount(); + if (ref.hasTransform()) + setTransform(*outIt,ordinal); + outIt++; + } + } + return outIt; + } + }; protected: virtual ~IGeometryCollection() = default; @@ -93,25 +131,6 @@ class NBL_API2 IGeometryCollection : public virtual core::IReferenceCounted m_jointAABBView = std::move(jointAABBView); return true; } - - // need to be protected because of the mess around `transform` requires us to provide diffferent signatures for ICPUGeometryCollection and IGPUGeometryCollection - using BLASTriangles = IBottomLevelAccelerationStructure::Triangles>; - template// requires std::is_same_v()),decltype(BLASTriangles&)> - inline Iterator exportForBLAS(Iterator out, Callback& setTransform) const - { - for (const auto& ref : m_geometries) - { - // not a polygon geometry - const auto* geo = ref.geometry.get(); - if (geo->getPrimitiveType()==IGeometryBase::EPrimitiveType::Polygon) - continue; - const auto* polyGeo = static_cast*>(geo); - *out = polyGeo->exportForBLAS(); - if (out->vertexData[0]) - out++; - } - return out; - } // For the entire collection, as always it should NOT include any geometry which is affected by a joint. diff --git a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp index 8026235c1e..399a79a2a3 100644 --- a/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp +++ b/src/nbl/ext/MitsubaLoader/CMitsubaLoader.cpp @@ -261,7 +261,7 @@ SAssetBundle CMitsubaLoader::loadAsset(system::IFile* _file, const IAssetLoader: return; } const auto index = instances.size(); - instances.resize(index+1); + instances.resize(index+1,true); instances.getMorphTargets()[index] = core::smart_refctd_ptr(const_cast(targets.get())); // TODO: add materials (incl emission) to the instances /* From 56b5f39a1252e4702d2da5bed21b2377299a4cd6 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 23:27:40 +0100 Subject: [PATCH 20/54] introduce TLAS export --- examples_tests | 2 +- include/nbl/asset/ICPUScene.h | 172 ++++++++++++++++++++++++++++++++-- 2 files changed, 167 insertions(+), 7 deletions(-) diff --git a/examples_tests b/examples_tests index 6ebb966161..85d44671d1 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 6ebb9661618c01e00498c43481f610586200885a +Subproject commit 85d44671d137669ce51d973c8cf76b38dad5a12a diff --git a/include/nbl/asset/ICPUScene.h b/include/nbl/asset/ICPUScene.h index 56a069c469..9d363317de 100644 --- a/include/nbl/asset/ICPUScene.h +++ b/include/nbl/asset/ICPUScene.h @@ -110,11 +110,11 @@ class ICPUScene final : public IAsset, public IScene struct SInstanceStorage final { public: - inline SInstanceStorage(const size_t size=1) : morphTargets(size), materials(size), initialTransforms(size) {} + inline SInstanceStorage(const size_t size=0) : morphTargets(size), materials(size), initialTransforms(size) {} inline void clearInitialTransforms() {initialTransforms.clear();} - inline operator bool() const + explicit inline operator bool() const { if (morphTargets.size()!=materials.size()) return false; @@ -131,11 +131,11 @@ class ICPUScene final : public IAsset, public IScene initialTransforms.reserve(newSize); } - inline void resize(const size_t newSize) + inline void resize(const size_t newSize, const bool forceTransformStorage=false) { morphTargets.resize(newSize); materials.resize(newSize,InvalidMaterialTable); - if (!initialTransforms.empty()) + if (forceTransformStorage || !initialTransforms.empty()) initialTransforms.resize(newSize,ICPUGeometryCollection::SGeometryReference{}.transform); } @@ -143,11 +143,12 @@ class ICPUScene final : public IAsset, public IScene { morphTargets.erase(morphTargets.begin()+first,morphTargets.begin()+last); materials.erase(materials.begin()+first, materials.begin()+last); - initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last); + if (!initialTransforms.empty()) + initialTransforms.erase(initialTransforms.begin()+first,initialTransforms.begin()+last); } inline void erase(const size_t ix) {return erase(ix,ix+1);} - inline size_t size() const {return morphTargets.size();} + inline uint64_t size() const {return morphTargets.size();} inline std::span> getMorphTargets() {return morphTargets;} inline std::span> getMorphTargets() const {return morphTargets;} @@ -169,6 +170,165 @@ class ICPUScene final : public IAsset, public IScene // TODO: animations (keyframed transforms, skeleton instance) }; + // utility + class ITLASExporter + { + protected: + using instance_flags_t = asset::ICPUTopLevelAccelerationStructure::INSTANCE_FLAGS; + + inline ITLASExporter(const SInstanceStorage& _storage) : m_storage(_storage) {} + + const SInstanceStorage& m_storage; + + public: + virtual inline ICPUMorphTargets::index_t getTargetIndex(const uint32_t instanceIx) {return ICPUMorphTargets::index_t{0u};} + + virtual inline instance_flags_t getInstanceFlags(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + // TODO: could derive from the material table if we want FORCE_OPAQUE_BIT or FORCE_NO_OPAQUE_BIT but its a whole instance thing + return instance_flags_t::TRIANGLE_FACING_CULL_DISABLE_BIT; + } + + virtual inline uint32_t getInstanceIndex(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) {return instanceIx;} + + // default + virtual inline uint32_t getSBTOffset(const material_table_offset_t materialsBeginIndex) + { + return 0; + } + + virtual inline uint8_t getMask(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + return 0xFF; + } + + virtual inline hlsl::float32_t3x4 getTransform(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) + { + if (m_storage.initialTransforms.empty()) + return hlsl::math::linalg::diagonal(1.f); + else + return m_storage.initialTransforms[instanceIx]; + } + + // TODO: when we allow non-polygon geometries in the collection, we need to return a named pair, one BLAS for tris and one for AABBs + virtual core::smart_refctd_ptr getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) = 0; + + struct SResult + { + explicit inline operator bool() const {return instances && !instances->empty();} + + core::smart_refctd_dynamic_array instances = nullptr; + bool allInstancesValid = false; + }; + // TODO: SBT stuff + inline SResult operator()() + { + // this is because most GPUs report 16M as max instance count, and there's only 24 bits in `instanceCustomIndex` + constexpr uint64_t MaxInstanceCount = 0x1u<<24; + const uint64_t instanceCount = m_storage.size(); + if (instanceCount>MaxInstanceCount) + return {}; + + std::vector instances; + instances.reserve(instanceCount*2); + bool allInstancesValid = true; + for (auto i=0u; ivalid()) + { + allInstancesValid = false; + continue; + } + const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get(); + ICPUTopLevelAccelerationStructure::StaticInstance inst; + inst.base.blas = getBLAS(i,targetIx); + if (!inst.base.blas) + { + allInstancesValid = false; + continue; + } + inst.transform = getTransform(i,targetIx); + const uint32_t customIndex = getInstanceIndex(i,targetIx); + if (customIndex>=MaxInstanceCount) + { + allInstancesValid = false; + continue; + } + inst.base.instanceCustomIndex = customIndex; + inst.base.mask = getMask(i,targetIx); + const auto targetTableOffset = m_storage.materials[i]+targets->getGeometryExclusiveCount(targetIx); + const auto sbtOffset = getSBTOffset(targetTableOffset); + if (sbtOffset>MaxInstanceCount+collection->getGeometries().size()) + { + allInstancesValid = false; + continue; + } + inst.base.instanceShaderBindingTableRecordOffset = sbtOffset; + inst.base.flags = static_cast(getInstanceFlags(i,targetIx)); + instances.emplace_back().instance = std::move(inst); + } + // TODO: adjust BLAS geometry flags according to materials set opaqueness and NO_DUPLICATE_ANY_HIT_INVOCATION_BIT + SResult retval = {.instances=core::make_refctd_dynamic_array(instanceCount),.allInstancesValid=allInstancesValid}; + std::move(instances.begin(),instances.end(),retval.instances->begin()); + return retval; + } + }; + class CDefaultTLASExporter final : public ITLASExporter + { + using triangles_t = ICPUBottomLevelAccelerationStructure::Triangles; + core::vector triangleScratch; + core::vector primitiveCountScratch; + + public: + inline CDefaultTLASExporter(const SInstanceStorage& _storage) : ITLASExporter(_storage) {} + + inline core::smart_refctd_ptr getBLAS(const uint32_t instanceIx, const ICPUMorphTargets::index_t targetIx) override + { + const auto* const targets = m_storage.morphTargets[instanceIx].get(); + const auto* const collection = targets->getTargets()[targetIx.value].geoCollection.get(); + // TODO: use emplace so erase can be faster + auto& entry = m_blasCache[collection]; + if (!entry) + { + entry = core::make_smart_refctd_ptr(); + // + const auto& geometries = collection->getGeometries(); + // deal with triangles + { + triangleScratch.resize(geometries.size()); + primitiveCountScratch.resize(geometries.size()); + const auto usedScratchEnd = ICPUGeometryCollection::CBLASExporter(geometries)(triangleScratch.begin(),primitiveCountScratch.data()); + // TODO: report some error that a there was an unsupported geometry + //triangleScratch.end()!=usedScratchEnd + const auto actualGeoCount = std::distance(triangleScratch.begin(),usedScratchEnd); + if (actualGeoCount==0) + { + m_blasCache.erase(m_blasCache.find(collection)); + return nullptr; + } + auto triGeos = core::make_refctd_dynamic_array>(actualGeoCount); + std::move(triangleScratch.begin(),usedScratchEnd,triGeos->begin()); + auto primCounts = core::make_refctd_dynamic_array>(actualGeoCount); + std::copy_n(primitiveCountScratch.data(),actualGeoCount,primCounts->data()); + entry->setGeometries(std::move(triGeos),std::move(primCounts)); + } + using build_f = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS; + // no virtual callbacks because its easy to tell what geometry collection the BLAS came from by looking at the cache after the export + // TODO: Allow Update when we figure out morph targets/skinning + // TODO: GEOMETRY_TYPE_IS_AABB_BIT for non-polygon geometry collections + entry->setBuildFlags(build_f::PREFER_FAST_TRACE_BIT|build_f::ALLOW_COMPACTION_BIT); + entry->setContentHash(entry->computeContentHash()); + } + return entry; + } + + // when doing animations, it good to copy and reuse this with dummy BLASes but where content hashes are already the same + core::unordered_map> m_blasCache; + }; + // inline SInstanceStorage& getInstances() {return m_instances;} inline const SInstanceStorage& getInstances() const {return m_instances;} From 23ae60a291c161d74cf2a846d567ebcb2c59d6d7 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 20 Feb 2026 23:44:02 +0100 Subject: [PATCH 21/54] fix a compiler warning --- include/nbl/builtin/hlsl/cpp_compat/promote.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 1887f4b51f..a34acf2507 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -29,7 +29,7 @@ struct Promote && (concepts array_set setter; To output; [[unroll]] - for (int i = 0; i < vector_traits::Dimension; ++i) + for (uint32_t i=0; i::Dimension; ++i) setter(output, i, v); return output; } From 07cefd03c015db63b355cc3009eb168f8ef44e8d Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Sat, 21 Feb 2026 00:48:21 +0100 Subject: [PATCH 22/54] wait for a better solution from Arkadiusz --- include/nbl/system/IApplicationFramework.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index f17859ebb2..d91151a2db 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -56,7 +56,7 @@ class IApplicationFramework : public core::IReferenceCounted #else // Build-interface binaries select lookup mode at runtime via NBL_RUN_FROM_BUILD_INTERFACE. // This is required because the same host-built executable can later be run from an install package. - const bool useInstallLookups = !readEnvFlag("NBL_RUN_FROM_BUILD_INTERFACE"); + const bool useInstallLookups = false;// !readEnvFlag("NBL_RUN_FROM_BUILD_INTERFACE"); #endif // NBL_RELOCATABLE_PACKAGE constexpr struct From 64107fe7e0024f9af4a41cf7a9efc08730e98e55 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Sat, 21 Feb 2026 01:51:50 +0100 Subject: [PATCH 23/54] silence a few warnings --- include/nbl/builtin/hlsl/math/linalg/basic.hlsl | 2 +- src/nbl/ext/MitsubaLoader/CElementBSDF.cpp | 5 ++++- src/nbl/ext/MitsubaLoader/CElementEmitter.cpp | 3 +++ src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp | 4 +++- src/nbl/ext/MitsubaLoader/CElementSensor.cpp | 5 +++-- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/nbl/builtin/hlsl/math/linalg/basic.hlsl b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl index 15b9014998..64f923a521 100644 --- a/include/nbl/builtin/hlsl/math/linalg/basic.hlsl +++ b/include/nbl/builtin/hlsl/math/linalg/basic.hlsl @@ -42,7 +42,7 @@ MatT identity() } template truncate(const NBL_CONST_REF_ARG(matrix) inMatrix) +inline matrix truncate(NBL_CONST_REF_ARG(matrix) inMatrix) { matrix retval; diff --git a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp index 4117ca5f3a..5d988b614c 100644 --- a/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementBSDF.cpp @@ -12,6 +12,8 @@ #include +#pragma warning( push ) +#pragma warning( disable : 5103 ) namespace nbl::ext::MitsubaLoader { namespace impl @@ -422,4 +424,5 @@ bool CElementBSDF::onEndTag(CMitsubaMetadata* globalMetadata, system::logger_opt return true; } -} \ No newline at end of file +} +#pragma warning( pop ) \ No newline at end of file diff --git a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp index 93b77b4c3a..d179235f54 100644 --- a/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementEmitter.cpp @@ -21,6 +21,8 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap using this_t = CElementEmitter; AddPropertyMap retval; +#pragma warning( push ) +#pragma warning( disable : 5103 ) // funky transform setting NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY("position",POINT) { @@ -116,6 +118,7 @@ auto CElementEmitter::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(gamma,FLOAT,std::is_same,EnvMap); #undef ADD_SPECTRUM +#pragma warning( pop ) return retval; } diff --git a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp index f99b4487f9..859730167a 100644 --- a/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp +++ b/src/nbl/ext/MitsubaLoader/CElementIntegrator.cpp @@ -41,6 +41,8 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap retval; +#pragma warning( push ) +#pragma warning( disable : 5103 ) // common // this one has really funny legacy behaviour which Mitsuba allowed contrary to its PDF docs NBL_EXT_MITSUBA_LOADER_REGISTER_ADD_PROPERTY_CONSTRAINED("shadingSamples",INTEGER,is_any_of,AmbientOcclusion,DirectIllumination) @@ -200,7 +202,7 @@ auto CElementIntegrator::compAddPropertyMap() -> AddPropertyMap AddPropertyMap { using this_t = CElementSensor; AddPropertyMap retval; - +#pragma warning( push ) +#pragma warning( disable : 5103 ) NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(up,VECTOR,derived_from,ShutterSensor); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftX,FLOAT,derived_from,PerspectivePinhole); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(shiftY,FLOAT,derived_from,PerspectivePinhole); @@ -50,7 +51,7 @@ auto CElementSensor::compAddPropertyMap() -> AddPropertyMap NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(farClip,FLOAT,derived_from,CameraBase); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(focusDistance,FLOAT,derived_from,DepthOfFieldBase); NBL_EXT_MITSUBA_LOADER_REGISTER_SIMPLE_ADD_VARIANT_PROPERTY_CONSTRAINED(apertureRadius,FLOAT,derived_from,DepthOfFieldBase); - +#pragma warning( pop ) // special auto setClipPlane = [](this_t* _this, SNamedPropertyElement&& _property, const system::logger_opt_ptr logger)->bool { From 4466eddace8eccb51468742ced756f8e37b45519 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 04:56:20 +0100 Subject: [PATCH 24/54] Guard install lookup paths with package defines --- include/nbl/system/IApplicationFramework.h | 172 ++++++++++++++------- 1 file changed, 112 insertions(+), 60 deletions(-) diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index f17859ebb2..bb4c99e4ed 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -25,87 +25,139 @@ class IApplicationFramework : public core::IReferenceCounted // this is safe to call multiple times static bool GlobalsInit() { - // TODO: update CMake and rename "DLL" in all of those defines here to "MODULE" or "RUNTIME" - auto resolveDir = [](const char* value) + struct Interface { - if (!value || (value[0] == '\0')) - return system::path(""); - - const auto candidate = system::path(value); - if (std::filesystem::exists(candidate)) - return candidate; - - return system::path(""); + system::path install; + system::path build; }; - auto readEnvFlag = [](const char* key) + struct Module { - const char* value = std::getenv(key); - if (!value || (value[0] == '\0')) - return false; - - const std::string_view v(value); - return (v != "0") && (v != "false") && (v != "off") && (v != "no"); + Interface paths; + std::string_view name; }; - const auto sdk = resolveDir(std::getenv("NBL_INSTALL_DIRECTORY")); - - #ifdef NBL_RELOCATABLE_PACKAGE - // Relocatable package consumers must use install lookups only. - const bool useInstallLookups = true; - #else - // Build-interface binaries select lookup mode at runtime via NBL_RUN_FROM_BUILD_INTERFACE. - // This is required because the same host-built executable can later be run from an install package. - const bool useInstallLookups = !readEnvFlag("NBL_RUN_FROM_BUILD_INTERFACE"); - #endif // NBL_RELOCATABLE_PACKAGE - - constexpr struct - { - std::string_view nabla, dxc; - } module = - { + Module nabla{ + {}, #ifdef _NBL_SHARED_BUILD_ _NABLA_DLL_NAME_ #else "" #endif - , - "dxcompiler" }; - struct - { - system::path nabla, dxc; - } install, env, build, rel; - - #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) && defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY) - - #if defined(_NABLA_INSTALL_DIR_) - install.nabla = std::filesystem::absolute(system::path(_NABLA_INSTALL_DIR_) / NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY); - install.dxc = std::filesystem::absolute(system::path(_NABLA_INSTALL_DIR_) / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY); - #endif - - //! ABS key is full key to file inside relocatable package - env.nabla = sdk / NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; - env.dxc = sdk / NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY; - #endif + Module dxc{ + {}, + "dxcompiler" + }; #ifdef _NBL_SHARED_BUILD_ #if defined(_NABLA_OUTPUT_DIR_) - build.nabla = _NABLA_OUTPUT_DIR_; + nabla.paths.build = _NABLA_OUTPUT_DIR_; #endif #endif #if defined(_DXC_DLL_) - build.dxc = path(_DXC_DLL_).parent_path(); + dxc.paths.build = path(_DXC_DLL_).parent_path(); #endif - //! consumer can set this as relative path between exe & DLLs - #ifdef NBL_CPACK_PACKAGE_NABLA_DLL_DIR - rel.nabla = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; + // There must be no mix between interfaces' lookup, we detect our packate layout + // to determine whether its install prefix or host build tree execution + + #ifdef NBL_RELOCATABLE_PACKAGE + const bool useInstallLookups = true; + #else + auto getExecutableDirectory = []() -> system::path + { + #if defined(_NBL_PLATFORM_WINDOWS_) + wchar_t modulePath[MAX_PATH] = {}; + const auto length = GetModuleFileNameW(nullptr, modulePath, MAX_PATH); + if ((length == 0) || (length >= MAX_PATH)) + return system::path(""); + return std::filesystem::path(modulePath).parent_path(); + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + std::error_code ec; + const auto executablePath = std::filesystem::read_symlink("/proc/self/exe", ec); + if (ec) + return system::path(""); + return executablePath.parent_path(); + #else + return system::path(""); + #endif + }; + const auto executableDirectory = getExecutableDirectory(); + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) + const auto nablaRelDir = system::path(NBL_CPACK_PACKAGE_NABLA_DLL_DIR); + nabla.paths.install = std::filesystem::absolute(executableDirectory / nablaRelDir); #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) + const auto dxcRelDir = system::path(NBL_CPACK_PACKAGE_DXC_DLL_DIR); + dxc.paths.install = std::filesystem::absolute(executableDirectory / dxcRelDir); + #endif + + const auto detectPackageLayout = [&nabla, &dxc]() + { + auto moduleExistsInDir = [](const system::path& dir, std::string_view moduleName) + { + if (dir.empty() || moduleName.empty() || !std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) + return false; + + const std::string baseName(moduleName); + auto hasRegularFile = [&dir](const std::string& fileName) + { + const auto filePath = dir / fileName; + return std::filesystem::exists(filePath) && std::filesystem::is_regular_file(filePath); + }; + + if (hasRegularFile(baseName)) + return true; + + #if defined(_NBL_PLATFORM_WINDOWS_) + if (hasRegularFile(baseName + ".dll")) + return true; + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + if (hasRegularFile(baseName + ".so")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + const std::string libBaseName = hasLibPrefix ? baseName : ("lib" + baseName); + if (hasRegularFile(libBaseName + ".so")) + return true; + + const std::string versionedPrefix = libBaseName + ".so."; + std::error_code ec; + for (const auto& entry : std::filesystem::directory_iterator(dir, ec)) + { + if (ec) + break; + if (!entry.is_regular_file(ec)) + continue; + + const auto fileName = entry.path().filename().string(); + if (fileName.rfind(versionedPrefix, 0) == 0) + return true; + } + #elif defined(__APPLE__) + if (hasRegularFile(baseName + ".dylib")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + if (!hasLibPrefix && hasRegularFile("lib" + baseName + ".dylib")) + return true; + #endif + + return false; + }; + + const bool hasPackageDxc = moduleExistsInDir(dxc.paths.install, dxc.name); + #ifdef _NBL_SHARED_BUILD_ + const bool hasPackageNabla = moduleExistsInDir(nabla.paths.install, nabla.name); + return hasPackageDxc && hasPackageNabla; + #else + return hasPackageDxc; + #endif + }; - #ifdef NBL_CPACK_PACKAGE_DXC_DLL_DIR - rel.dxc = NBL_CPACK_PACKAGE_DXC_DLL_DIR; + const bool useInstallLookups = detectPackageLayout(); #endif using RV = const std::vector; @@ -132,11 +184,11 @@ class IApplicationFramework : public core::IReferenceCounted return true; }; - if (not load(module.dxc, useInstallLookups ? RV{ rel.dxc, env.dxc, install.dxc } : RV{ build.dxc })) + if (not load(dxc.name, useInstallLookups ? RV{ dxc.paths.install } : RV{ dxc.paths.build })) return false; #ifdef _NBL_SHARED_BUILD_ - if (not load(module.nabla, useInstallLookups ? RV{ rel.nabla, env.nabla, install.nabla } : RV{ build.nabla })) + if (not load(nabla.name, useInstallLookups ? RV{ nabla.paths.install } : RV{ nabla.paths.build })) return false; #endif From 1c456efcb44b22bc75d3017cb0c302c298ca6c73 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 05:39:41 +0100 Subject: [PATCH 25/54] Remove NSC build interface env wrapper --- cmake/common.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/common.cmake b/cmake/common.cmake index dbe30dc3d7..48a4098d97 100755 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -1535,7 +1535,6 @@ namespace @IMPL_NAMESPACE@ { endif() set(NBL_NSC_COMPILE_COMMAND - "${CMAKE_COMMAND}" -E env "NBL_RUN_FROM_BUILD_INTERFACE=$<$>>:1>" "$" -Fc "${TARGET_OUTPUT}" ${COMPILE_OPTIONS} ${REQUIRED_OPTIONS} ${IMPL_COMMON_OPTIONS} From b64184a14d18cff4bc4a8d9f4297affb2251ca30 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 13:44:10 +0100 Subject: [PATCH 26/54] Refactor runtime module lookup and smoke override tests --- include/nbl/system/CSystemWin32.h | 15 +- include/nbl/system/IApplicationFramework.h | 169 +++-------------- include/nbl/system/ModuleLookupUtils.h | 114 ++++++++++++ include/nbl/system/RuntimeModuleLookup.h | 201 +++++++++++++++++++++ smoke/CMakeLists.txt | 33 +++- smoke/main.cpp | 12 +- smoke/run_override_test.cmake | 108 +++++++++++ 7 files changed, 491 insertions(+), 161 deletions(-) create mode 100644 include/nbl/system/ModuleLookupUtils.h create mode 100644 include/nbl/system/RuntimeModuleLookup.h create mode 100644 smoke/run_override_test.cmake diff --git a/include/nbl/system/CSystemWin32.h b/include/nbl/system/CSystemWin32.h index 01766ddaa8..7c73525c43 100644 --- a/include/nbl/system/CSystemWin32.h +++ b/include/nbl/system/CSystemWin32.h @@ -2,9 +2,12 @@ #define _NBL_SYSTEM_C_SYSTEM_WIN32_H_INCLUDED_ #include "nbl/system/ISystem.h" +#include "nbl/system/ModuleLookupUtils.h" #ifdef _NBL_PLATFORM_WINDOWS_ +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include #include @@ -51,13 +54,7 @@ class NBL_API2 CSystemWin32 : public ISystem #endif ; // legal & on purpose - const auto executableDirectory = []() -> std::filesystem::path - { - wchar_t path[MAX_PATH] = { 0 }; - GetModuleFileNameW(NULL, path, MAX_PATH); - - return std::filesystem::path(path).parent_path(); - }(); + const auto exeDirectory = executableDirectory(); // load from right next to the executable (always be able to override like this) HMODULE res = LoadLibraryExA(dllName, NULL, LOAD_LIBRARY_SEARCH_APPLICATION_DIR); @@ -80,7 +77,7 @@ class NBL_API2 CSystemWin32 : public ISystem // then relative to the executable's directory { - const auto path = std::filesystem::absolute(executableDirectory / requestModulePath).string(); + const auto path = std::filesystem::absolute(exeDirectory / requestModulePath).string(); if (logRequests) printf("[INFO]: Requesting \"%s\" module load with \"%s\" search path...\n", dllName, path.c_str()); @@ -124,4 +121,4 @@ class NBL_API2 CSystemWin32 : public ISystem #endif -#endif \ No newline at end of file +#endif diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index bb4c99e4ed..82da25cfb5 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -15,6 +15,7 @@ #include "nbl/system/CSystemAndroid.h" #include "nbl/system/CSystemLinux.h" #include "nbl/system/CSystemWin32.h" +#include "nbl/system/RuntimeModuleLookup.h" namespace nbl::system { @@ -25,143 +26,33 @@ class IApplicationFramework : public core::IReferenceCounted // this is safe to call multiple times static bool GlobalsInit() { - struct Interface - { - system::path install; - system::path build; - }; - - struct Module - { - Interface paths; - std::string_view name; - }; - - Module nabla{ - {}, - #ifdef _NBL_SHARED_BUILD_ - _NABLA_DLL_NAME_ - #else - "" - #endif - }; - - Module dxc{ - {}, - "dxcompiler" - }; - - #ifdef _NBL_SHARED_BUILD_ - #if defined(_NABLA_OUTPUT_DIR_) - nabla.paths.build = _NABLA_OUTPUT_DIR_; - #endif - #endif - #if defined(_DXC_DLL_) - dxc.paths.build = path(_DXC_DLL_).parent_path(); - #endif - - // There must be no mix between interfaces' lookup, we detect our packate layout - // to determine whether its install prefix or host build tree execution - - #ifdef NBL_RELOCATABLE_PACKAGE - const bool useInstallLookups = true; - #else - auto getExecutableDirectory = []() -> system::path - { - #if defined(_NBL_PLATFORM_WINDOWS_) - wchar_t modulePath[MAX_PATH] = {}; - const auto length = GetModuleFileNameW(nullptr, modulePath, MAX_PATH); - if ((length == 0) || (length >= MAX_PATH)) - return system::path(""); - return std::filesystem::path(modulePath).parent_path(); - #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) - std::error_code ec; - const auto executablePath = std::filesystem::read_symlink("/proc/self/exe", ec); - if (ec) - return system::path(""); - return executablePath.parent_path(); - #else - return system::path(""); - #endif - }; - const auto executableDirectory = getExecutableDirectory(); - #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) - const auto nablaRelDir = system::path(NBL_CPACK_PACKAGE_NABLA_DLL_DIR); - nabla.paths.install = std::filesystem::absolute(executableDirectory / nablaRelDir); - #endif - #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) - const auto dxcRelDir = system::path(NBL_CPACK_PACKAGE_DXC_DLL_DIR); - dxc.paths.install = std::filesystem::absolute(executableDirectory / dxcRelDir); - #endif - - const auto detectPackageLayout = [&nabla, &dxc]() - { - auto moduleExistsInDir = [](const system::path& dir, std::string_view moduleName) - { - if (dir.empty() || moduleName.empty() || !std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) - return false; - - const std::string baseName(moduleName); - auto hasRegularFile = [&dir](const std::string& fileName) - { - const auto filePath = dir / fileName; - return std::filesystem::exists(filePath) && std::filesystem::is_regular_file(filePath); - }; - - if (hasRegularFile(baseName)) - return true; - - #if defined(_NBL_PLATFORM_WINDOWS_) - if (hasRegularFile(baseName + ".dll")) - return true; - #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) - if (hasRegularFile(baseName + ".so")) - return true; - - const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); - const std::string libBaseName = hasLibPrefix ? baseName : ("lib" + baseName); - if (hasRegularFile(libBaseName + ".so")) - return true; - - const std::string versionedPrefix = libBaseName + ".so."; - std::error_code ec; - for (const auto& entry : std::filesystem::directory_iterator(dir, ec)) - { - if (ec) - break; - if (!entry.is_regular_file(ec)) - continue; - - const auto fileName = entry.path().filename().string(); - if (fileName.rfind(versionedPrefix, 0) == 0) - return true; - } - #elif defined(__APPLE__) - if (hasRegularFile(baseName + ".dylib")) - return true; - - const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); - if (!hasLibPrefix && hasRegularFile("lib" + baseName + ".dylib")) - return true; - #endif - - return false; - }; - - const bool hasPackageDxc = moduleExistsInDir(dxc.paths.install, dxc.name); - #ifdef _NBL_SHARED_BUILD_ - const bool hasPackageNabla = moduleExistsInDir(nabla.paths.install, nabla.name); - return hasPackageDxc && hasPackageNabla; - #else - return hasPackageDxc; - #endif - }; - - const bool useInstallLookups = detectPackageLayout(); - #endif - - using RV = const std::vector; - auto load = [](std::string_view moduleName, const RV& searchPaths) + RuntimeModuleLookup lookup; + + const auto exeDirectory = system::executableDirectory(); + lookup.applyInstallOverrides(exeDirectory); + /* + In the current design build interface and install interface cannot share one lookup set. + + Build lookup may point to host-only output folders while install lookup must stay relocatable. + Mixing them can load stale modules from host build trees and break packaged consumers. + Another big issue is Nabla build-system layout because runtime binaries are emitted into + source-side locations instead of a binary-tree runtime prefix that mirrors install layout. + This makes executable-relative lookup ambiguous and forces a split between build and install lookup modes. + There are more issues caused by this non-unified layout than the ones handled in this file. + + Desired end state is that build outputs follow the same relative runtime layout as install so lookup can stay install-style + for both host build and package consumers while still allowing consumer override paths like "./Libraries". + No interface should expose any define that contains an absolute path. + All binaries must be emitted into the build directory and Nabla + should remain fully buildable with a read-only source filesystem. + + I cannot address all of that here because it requires a broader Nabla build-system refactor. + */ + const bool useInstallLookups = lookup.chooseInstallLookupMode(exeDirectory); + lookup.finalizeInstallLookups(useInstallLookups); + + using SearchPaths = std::vector; + const auto load = [](std::string_view moduleName, const SearchPaths& searchPaths) { #ifdef _NBL_PLATFORM_WINDOWS_ const bool isAlreadyLoaded = GetModuleHandleA(moduleName.data()); @@ -184,11 +75,11 @@ class IApplicationFramework : public core::IReferenceCounted return true; }; - if (not load(dxc.name, useInstallLookups ? RV{ dxc.paths.install } : RV{ dxc.paths.build })) + if (not load(lookup.dxc.name, useInstallLookups ? SearchPaths{ lookup.dxc.paths.install } : SearchPaths{ lookup.dxc.paths.build })) return false; #ifdef _NBL_SHARED_BUILD_ - if (not load(nabla.name, useInstallLookups ? RV{ nabla.paths.install } : RV{ nabla.paths.build })) + if (not load(lookup.nabla.name, useInstallLookups ? SearchPaths{ lookup.nabla.paths.install } : SearchPaths{ lookup.nabla.paths.build })) return false; #endif diff --git a/include/nbl/system/ModuleLookupUtils.h b/include/nbl/system/ModuleLookupUtils.h new file mode 100644 index 0000000000..c763cc8e30 --- /dev/null +++ b/include/nbl/system/ModuleLookupUtils.h @@ -0,0 +1,114 @@ +#ifndef _NBL_SYSTEM_MODULE_LOOKUP_UTILS_H_INCLUDED_ +#define _NBL_SYSTEM_MODULE_LOOKUP_UTILS_H_INCLUDED_ + +#include "nbl/system/path.h" + +#include +#include +#include +#include + +#if defined(_NBL_PLATFORM_WINDOWS_) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#endif + +namespace nbl::system +{ +inline bool moduleExistsInDirectory(const system::path& dir, std::string_view moduleName) +{ + if (dir.empty() || moduleName.empty() || !std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) + return false; + + const std::string baseName(moduleName); + const auto hasRegularFile = [&dir](const std::string& fileName) + { + const auto filePath = dir / fileName; + return std::filesystem::exists(filePath) && std::filesystem::is_regular_file(filePath); + }; + + if (hasRegularFile(baseName)) + return true; + + #if defined(_NBL_PLATFORM_WINDOWS_) + if (hasRegularFile(baseName + ".dll")) + return true; + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + if (hasRegularFile(baseName + ".so")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + const std::string libBaseName = hasLibPrefix ? baseName : ("lib" + baseName); + if (hasRegularFile(libBaseName + ".so")) + return true; + + const std::string versionedPrefix = libBaseName + ".so."; + std::error_code ec; + for (const auto& entry : std::filesystem::directory_iterator(dir, ec)) + { + if (ec) + break; + if (!entry.is_regular_file(ec)) + continue; + + const auto fileName = entry.path().filename().string(); + if (fileName.rfind(versionedPrefix, 0) == 0) + return true; + } + #elif defined(__APPLE__) + if (hasRegularFile(baseName + ".dylib")) + return true; + + const bool hasLibPrefix = (baseName.rfind("lib", 0) == 0); + if (!hasLibPrefix && hasRegularFile("lib" + baseName + ".dylib")) + return true; + #endif + + return false; +} + +inline system::path executableDirectory() +{ + #if defined(_NBL_PLATFORM_WINDOWS_) + wchar_t modulePath[MAX_PATH] = {}; + const auto length = GetModuleFileNameW(nullptr, modulePath, MAX_PATH); + if ((length == 0) || (length >= MAX_PATH)) + return system::path(""); + return std::filesystem::path(modulePath).parent_path(); + #elif defined(_NBL_PLATFORM_LINUX_) || defined(_NBL_PLATFORM_ANDROID_) + std::error_code ec; + const auto executablePath = std::filesystem::read_symlink("/proc/self/exe", ec); + if (ec) + return system::path(""); + return executablePath.parent_path(); + #else + return system::path(""); + #endif +} + +inline system::path loadedModuleDirectory(std::string_view moduleName) +{ + #if defined(_NBL_PLATFORM_WINDOWS_) + if (moduleName.empty()) + return system::path(""); + + const auto moduleHandle = GetModuleHandleA(moduleName.data()); + if (moduleHandle == nullptr) + return system::path(""); + + wchar_t modulePath[MAX_PATH] = {}; + const auto length = GetModuleFileNameW(moduleHandle, modulePath, MAX_PATH); + if ((length == 0) || (length >= MAX_PATH)) + return system::path(""); + + return std::filesystem::path(modulePath).parent_path(); + #else + // TODO: implement loaded module directory lookup for non-Windows platforms. + return system::path(""); + #endif +} +} + +#endif diff --git a/include/nbl/system/RuntimeModuleLookup.h b/include/nbl/system/RuntimeModuleLookup.h new file mode 100644 index 0000000000..8268cd7e55 --- /dev/null +++ b/include/nbl/system/RuntimeModuleLookup.h @@ -0,0 +1,201 @@ +#ifndef _NBL_SYSTEM_RUNTIME_MODULE_LOOKUP_H_INCLUDED_ +#define _NBL_SYSTEM_RUNTIME_MODULE_LOOKUP_H_INCLUDED_ + +#include "nbl/system/ModuleLookupUtils.h" + +namespace nbl::system +{ +struct RuntimeModuleLookup final +{ + struct LookupPaths + { + system::path install; + system::path build; + }; + + struct Module + { + LookupPaths paths; + std::string_view name = ""; + std::string_view buildOutputDir = ""; + std::string_view buildDllPath = ""; + std::string_view installOverrideRel = ""; + std::string_view runtimeAbsKey = ""; + }; + + bool sharedBuild = false; + bool relocatablePackage = false; + Module nabla; + Module dxc; + + RuntimeModuleLookup() + { + dxc.name = "dxcompiler"; + #if defined(_NBL_SHARED_BUILD_) + sharedBuild = true; + nabla.name = _NABLA_DLL_NAME_; + #endif + #if defined(NBL_RELOCATABLE_PACKAGE) + relocatablePackage = true; + #endif + #if defined(_NABLA_OUTPUT_DIR_) + nabla.buildOutputDir = _NABLA_OUTPUT_DIR_; + #endif + #if defined(_DXC_DLL_) + dxc.buildDllPath = _DXC_DLL_; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) + nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) + dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) + nabla.runtimeAbsKey = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY) + dxc.runtimeAbsKey = NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY; + #endif + + applyBuildInterfacePaths(); + } + + inline void applyInstallOverrides(const system::path& exeDirectory) + { + if (hasInstallOverride(nabla)) + nabla.paths.install = absoluteFromExe(exeDirectory, nabla.installOverrideRel); + if (hasInstallOverride(dxc)) + dxc.paths.install = absoluteFromExe(exeDirectory, dxc.installOverrideRel); + } + + inline bool chooseInstallLookupMode(const system::path& exeDirectory) + { + if (relocatablePackage) + { + if (!hasCompleteInstallOverride()) + tryResolveInstallPathsFromPackageLayout(exeDirectory); + return true; + } + return hasUsableInstallPaths() || tryResolveInstallPathsFromPackageLayout(exeDirectory); + } + + inline void finalizeInstallLookups(bool useInstallLookups) + { + if (!useInstallLookups) + return; + #if defined(_NBL_PLATFORM_WINDOWS_) && defined(_NBL_SHARED_BUILD_) + if (nabla.paths.install.empty()) + nabla.paths.install = loadedModuleDirectory(nabla.name); + #endif + resolveDxcInstallPathFromLoadedNabla(useInstallLookups); + } + + private: + static inline bool hasInstallOverride(const Module& module) + { + return !module.installOverrideRel.empty(); + } + + static inline bool hasRuntimeAbsKey(const Module& module) + { + return !module.runtimeAbsKey.empty(); + } + + inline void applyBuildInterfacePaths() + { + if (sharedBuild && !nabla.buildOutputDir.empty()) + nabla.paths.build = system::path(nabla.buildOutputDir); + if (!dxc.buildDllPath.empty()) + dxc.paths.build = system::path(dxc.buildDllPath).parent_path(); + } + + static inline system::path absoluteFromExe(const system::path& exeDirectory, std::string_view relativePath) + { + if (relativePath.empty() || exeDirectory.empty()) + return system::path(""); + return std::filesystem::absolute(exeDirectory / system::path(relativePath)); + } + + inline bool hasUsableInstallPaths() const + { + if (!moduleExistsInDirectory(dxc.paths.install, dxc.name)) + return false; + return !sharedBuild || moduleExistsInDirectory(nabla.paths.install, nabla.name); + } + + inline bool tryResolveInstallPathsFromPrefix(const system::path& candidatePrefix) + { + if (candidatePrefix.empty()) + return false; + if (!hasRuntimeAbsKey(nabla) && !hasRuntimeAbsKey(dxc)) + return false; + + Module candidateNabla = nabla; + Module candidateDxc = dxc; + + if (hasRuntimeAbsKey(nabla)) + candidateNabla.paths.install = std::filesystem::absolute(candidatePrefix / system::path(nabla.runtimeAbsKey)); + if (hasRuntimeAbsKey(dxc)) + candidateDxc.paths.install = std::filesystem::absolute(candidatePrefix / system::path(dxc.runtimeAbsKey)); + + if (!moduleExistsInDirectory(candidateDxc.paths.install, candidateDxc.name)) + return false; + if (sharedBuild && !moduleExistsInDirectory(candidateNabla.paths.install, candidateNabla.name)) + return false; + + nabla.paths.install = candidateNabla.paths.install; + dxc.paths.install = candidateDxc.paths.install; + return true; + } + + inline bool tryResolveInstallPathsFromPackageLayout(const system::path& lookupStartDirectory) + { + if (lookupStartDirectory.empty()) + return false; + if (!hasRuntimeAbsKey(nabla) && !hasRuntimeAbsKey(dxc)) + return false; + + auto candidatePrefix = std::filesystem::absolute(lookupStartDirectory); + while (!candidatePrefix.empty()) + { + if (tryResolveInstallPathsFromPrefix(candidatePrefix)) + return true; + + const auto parent = candidatePrefix.parent_path(); + if (parent == candidatePrefix) + break; + candidatePrefix = parent; + } + return false; + } + + inline bool hasCompleteInstallOverride() const + { + return sharedBuild ? (hasInstallOverride(nabla) && hasInstallOverride(dxc)) : hasInstallOverride(dxc); + } + + #if defined(_NBL_PLATFORM_WINDOWS_) + inline void resolveDxcInstallPathFromLoadedNabla(bool useInstallLookups) + { + if (!useInstallLookups || !dxc.paths.install.empty()) + return; + if (!(sharedBuild && !nabla.runtimeAbsKey.empty() && !dxc.runtimeAbsKey.empty())) + return; + + const auto nablaRuntimeDir = !nabla.paths.install.empty() ? nabla.paths.install : loadedModuleDirectory(nabla.name); + if (nablaRuntimeDir.empty()) + return; + + const auto dxcRelToNabla = system::path(dxc.runtimeAbsKey).lexically_relative(system::path(nabla.runtimeAbsKey)); + if (!dxcRelToNabla.empty() && dxcRelToNabla != system::path(".")) + dxc.paths.install = std::filesystem::absolute(nablaRuntimeDir / dxcRelToNabla); + } + #else + inline void resolveDxcInstallPathFromLoadedNabla(bool) + { + } + #endif +}; +} + +#endif diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index c560e56a0f..34ed41965f 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -31,13 +31,26 @@ target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) +add_executable(smoke_override main.cpp pch.hpp cdb.ps1) +target_link_libraries(smoke_override PRIVATE Nabla::Nabla) +target_compile_definitions(smoke_override PRIVATE + _AFXDLL + "NBL_CPACK_PACKAGE_NABLA_DLL_DIR=\"./Libraries\"" + "NBL_CPACK_PACKAGE_DXC_DLL_DIR=\"./Libraries\"" +) +target_precompile_headers(smoke_override PRIVATE pch.hpp) +set_target_properties(smoke smoke_override PROPERTIES + RUNTIME_OUTPUT_DIRECTORY_DEBUG "${Nabla_ROOT}/debug/bin" + RUNTIME_OUTPUT_DIRECTORY_RELEASE "${Nabla_ROOT}/bin" + RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${Nabla_ROOT}/relwithdebinfo/bin" +) + set(CMAKE_CTEST_ARGUMENTS --verbose) enable_testing() set(OPTS NBL_EXPLICIT_MODULE_LOAD_LOG=1 NBL_EXPLICIT_MODULE_REQUEST_LOG=1 - NBL_INSTALL_DIRECTORY=${Nabla_ROOT} ) option(ENABLE_CRASH_HANDLER "Enable crash handler" ON) @@ -59,4 +72,20 @@ if(NOT ENABLE_CRASH_HANDLER) endif() add_test(NAME NBL_INSTALL_LOAD_API COMMAND ${CMD}) -set_tests_properties(NBL_INSTALL_LOAD_API PROPERTIES ENVIRONMENT "${OPTS}") \ No newline at end of file +set_tests_properties(NBL_INSTALL_LOAD_API PROPERTIES ENVIRONMENT "${OPTS}") + +set(_SMOKE_CPACK_REL_ENTRY "./$<$,Release>>:$>>") +set(_SMOKE_NABLA_RUNTIME_DIR "${Nabla_ROOT}/${_SMOKE_CPACK_REL_ENTRY}/runtime/nbl") +set(_SMOKE_DXC_RUNTIME_DIR "${_SMOKE_NABLA_RUNTIME_DIR}/3rdparty/dxc") +set(_SMOKE_OVERRIDE_LIB_DIR "$/Libraries") + +add_test(NAME NBL_INSTALL_LOAD_API_OVERRIDE + COMMAND "${CMAKE_COMMAND}" + "-DSMOKE_EXE=$" + "-DNABLA_RUNTIME_DIR=${_SMOKE_NABLA_RUNTIME_DIR}" + "-DDXC_RUNTIME_DIR=${_SMOKE_DXC_RUNTIME_DIR}" + "-DDXC_MODULE_BASENAME=dxcompiler" + "-DOVERRIDE_DIR=${_SMOKE_OVERRIDE_LIB_DIR}" + "-P" "$" +) +set_tests_properties(NBL_INSTALL_LOAD_API_OVERRIDE PROPERTIES ENVIRONMENT "${OPTS}") diff --git a/smoke/main.cpp b/smoke/main.cpp index 530b29adae..9510081f21 100644 --- a/smoke/main.cpp +++ b/smoke/main.cpp @@ -18,16 +18,6 @@ class Smoke final : public system::IApplicationFramework bool onAppInitialized(smart_refctd_ptr&& system) override { - const char* sdk = std::getenv("NBL_INSTALL_DIRECTORY"); - - if (sdk) - { - auto dir = std::filesystem::absolute(std::filesystem::path(sdk).make_preferred()).string(); - std::cout << "[INFO]: NBL_INSTALL_DIRECTORY = \"" << dir.c_str() << "\"\n"; - } - else - std::cerr << "[INFO]: NBL_INSTALL_DIRECTORY env was not defined!\n"; - if (isAPILoaded()) { std::cout << "[INFO]: Loaded Nabla API\n"; @@ -123,4 +113,4 @@ class Smoke final : public system::IApplicationFramework NBL_MAIN_FUNC(Smoke) #else int main() { return 0; } -#endif \ No newline at end of file +#endif diff --git a/smoke/run_override_test.cmake b/smoke/run_override_test.cmake new file mode 100644 index 0000000000..afd87de085 --- /dev/null +++ b/smoke/run_override_test.cmake @@ -0,0 +1,108 @@ +cmake_minimum_required(VERSION 3.20) + +if(NOT DEFINED SMOKE_EXE) + message(FATAL_ERROR "SMOKE_EXE is required") +endif() +if(NOT DEFINED OVERRIDE_DIR) + message(FATAL_ERROR "OVERRIDE_DIR is required") +endif() +if(NOT EXISTS "${SMOKE_EXE}") + message(FATAL_ERROR "SMOKE_EXE not found: ${SMOKE_EXE}") +endif() + +function(find_module_file OUT_VAR DIR_PATH MODULE_BASENAME REQUIRED_FLAG) + if(NOT EXISTS "${DIR_PATH}") + if(REQUIRED_FLAG) + message(FATAL_ERROR "Runtime directory not found: ${DIR_PATH}") + endif() + set(${OUT_VAR} "" PARENT_SCOPE) + return() + endif() + + set(_candidates "${MODULE_BASENAME}") + if(WIN32) + list(APPEND _candidates "${MODULE_BASENAME}.dll") + elseif(APPLE) + list(APPEND _candidates "${MODULE_BASENAME}.dylib" "lib${MODULE_BASENAME}.dylib") + else() + list(APPEND _candidates "${MODULE_BASENAME}.so" "lib${MODULE_BASENAME}.so") + endif() + + foreach(_name IN LISTS _candidates) + set(_candidate_path "${DIR_PATH}/${_name}") + if(EXISTS "${_candidate_path}" AND NOT IS_DIRECTORY "${_candidate_path}") + set(${OUT_VAR} "${_candidate_path}" PARENT_SCOPE) + return() + endif() + endforeach() + + if(NOT WIN32 AND NOT APPLE) + file(GLOB _versioned_candidates + "${DIR_PATH}/${MODULE_BASENAME}.so.*" + "${DIR_PATH}/lib${MODULE_BASENAME}.so.*" + ) + list(LENGTH _versioned_candidates _versioned_count) + if(_versioned_count GREATER 0) + list(GET _versioned_candidates 0 _versioned_first) + set(${OUT_VAR} "${_versioned_first}" PARENT_SCOPE) + return() + endif() + endif() + + if(REQUIRED_FLAG) + message(FATAL_ERROR "Could not find module ${MODULE_BASENAME} in ${DIR_PATH}") + endif() + + set(${OUT_VAR} "" PARENT_SCOPE) +endfunction() + +file(MAKE_DIRECTORY "${OVERRIDE_DIR}") + +if(DEFINED DXC_RUNTIME_DIR AND DEFINED DXC_MODULE_BASENAME AND NOT "${DXC_MODULE_BASENAME}" STREQUAL "") + find_module_file(_dxc_module_file "${DXC_RUNTIME_DIR}" "${DXC_MODULE_BASENAME}" TRUE) +else() + message(FATAL_ERROR "DXC_RUNTIME_DIR and DXC_MODULE_BASENAME are required") +endif() + +if(DEFINED NABLA_RUNTIME_DIR AND EXISTS "${NABLA_RUNTIME_DIR}") + if(DEFINED NABLA_MODULE_BASENAME AND NOT "${NABLA_MODULE_BASENAME}" STREQUAL "") + find_module_file(_nabla_module_file "${NABLA_RUNTIME_DIR}" "${NABLA_MODULE_BASENAME}" FALSE) + set(_nabla_modules "${_nabla_module_file}") + else() + if(WIN32) + file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.dll") + elseif(APPLE) + file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.dylib" "${NABLA_RUNTIME_DIR}/libNabla*.dylib") + else() + file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.so" "${NABLA_RUNTIME_DIR}/Nabla*.so.*" "${NABLA_RUNTIME_DIR}/libNabla*.so" "${NABLA_RUNTIME_DIR}/libNabla*.so.*") + endif() + endif() + + foreach(_nabla_module IN LISTS _nabla_modules) + if(NOT "${_nabla_module}" STREQUAL "") + execute_process( + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${_nabla_module}" "${OVERRIDE_DIR}" + RESULT_VARIABLE _copy_nabla_rv + ) + if(NOT _copy_nabla_rv EQUAL 0) + message(FATAL_ERROR "Failed to copy Nabla module from ${_nabla_module} to ${OVERRIDE_DIR}") + endif() + endif() + endforeach() +endif() + +execute_process( + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${_dxc_module_file}" "${OVERRIDE_DIR}" + RESULT_VARIABLE _copy_dxc_rv +) +if(NOT _copy_dxc_rv EQUAL 0) + message(FATAL_ERROR "Failed to copy DXC module from ${_dxc_module_file} to ${OVERRIDE_DIR}") +endif() + +execute_process( + COMMAND "${SMOKE_EXE}" + RESULT_VARIABLE _smoke_rv +) +if(NOT _smoke_rv EQUAL 0) + message(FATAL_ERROR "smoke_override failed with exit code ${_smoke_rv}") +endif() From 0f8b6c1d4294c9cf293105f8091eb0e85654d433 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 14:05:22 +0100 Subject: [PATCH 27/54] Remove smoke override test flow --- smoke/CMakeLists.txt | 27 +-------- smoke/run_override_test.cmake | 108 ---------------------------------- 2 files changed, 1 insertion(+), 134 deletions(-) delete mode 100644 smoke/run_override_test.cmake diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index 34ed41965f..2cd3c28d00 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -30,16 +30,7 @@ add_executable(smoke main.cpp pch.hpp cdb.ps1) target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) - -add_executable(smoke_override main.cpp pch.hpp cdb.ps1) -target_link_libraries(smoke_override PRIVATE Nabla::Nabla) -target_compile_definitions(smoke_override PRIVATE - _AFXDLL - "NBL_CPACK_PACKAGE_NABLA_DLL_DIR=\"./Libraries\"" - "NBL_CPACK_PACKAGE_DXC_DLL_DIR=\"./Libraries\"" -) -target_precompile_headers(smoke_override PRIVATE pch.hpp) -set_target_properties(smoke smoke_override PROPERTIES +set_target_properties(smoke PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG "${Nabla_ROOT}/debug/bin" RUNTIME_OUTPUT_DIRECTORY_RELEASE "${Nabla_ROOT}/bin" RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${Nabla_ROOT}/relwithdebinfo/bin" @@ -73,19 +64,3 @@ endif() add_test(NAME NBL_INSTALL_LOAD_API COMMAND ${CMD}) set_tests_properties(NBL_INSTALL_LOAD_API PROPERTIES ENVIRONMENT "${OPTS}") - -set(_SMOKE_CPACK_REL_ENTRY "./$<$,Release>>:$>>") -set(_SMOKE_NABLA_RUNTIME_DIR "${Nabla_ROOT}/${_SMOKE_CPACK_REL_ENTRY}/runtime/nbl") -set(_SMOKE_DXC_RUNTIME_DIR "${_SMOKE_NABLA_RUNTIME_DIR}/3rdparty/dxc") -set(_SMOKE_OVERRIDE_LIB_DIR "$/Libraries") - -add_test(NAME NBL_INSTALL_LOAD_API_OVERRIDE - COMMAND "${CMAKE_COMMAND}" - "-DSMOKE_EXE=$" - "-DNABLA_RUNTIME_DIR=${_SMOKE_NABLA_RUNTIME_DIR}" - "-DDXC_RUNTIME_DIR=${_SMOKE_DXC_RUNTIME_DIR}" - "-DDXC_MODULE_BASENAME=dxcompiler" - "-DOVERRIDE_DIR=${_SMOKE_OVERRIDE_LIB_DIR}" - "-P" "$" -) -set_tests_properties(NBL_INSTALL_LOAD_API_OVERRIDE PROPERTIES ENVIRONMENT "${OPTS}") diff --git a/smoke/run_override_test.cmake b/smoke/run_override_test.cmake deleted file mode 100644 index afd87de085..0000000000 --- a/smoke/run_override_test.cmake +++ /dev/null @@ -1,108 +0,0 @@ -cmake_minimum_required(VERSION 3.20) - -if(NOT DEFINED SMOKE_EXE) - message(FATAL_ERROR "SMOKE_EXE is required") -endif() -if(NOT DEFINED OVERRIDE_DIR) - message(FATAL_ERROR "OVERRIDE_DIR is required") -endif() -if(NOT EXISTS "${SMOKE_EXE}") - message(FATAL_ERROR "SMOKE_EXE not found: ${SMOKE_EXE}") -endif() - -function(find_module_file OUT_VAR DIR_PATH MODULE_BASENAME REQUIRED_FLAG) - if(NOT EXISTS "${DIR_PATH}") - if(REQUIRED_FLAG) - message(FATAL_ERROR "Runtime directory not found: ${DIR_PATH}") - endif() - set(${OUT_VAR} "" PARENT_SCOPE) - return() - endif() - - set(_candidates "${MODULE_BASENAME}") - if(WIN32) - list(APPEND _candidates "${MODULE_BASENAME}.dll") - elseif(APPLE) - list(APPEND _candidates "${MODULE_BASENAME}.dylib" "lib${MODULE_BASENAME}.dylib") - else() - list(APPEND _candidates "${MODULE_BASENAME}.so" "lib${MODULE_BASENAME}.so") - endif() - - foreach(_name IN LISTS _candidates) - set(_candidate_path "${DIR_PATH}/${_name}") - if(EXISTS "${_candidate_path}" AND NOT IS_DIRECTORY "${_candidate_path}") - set(${OUT_VAR} "${_candidate_path}" PARENT_SCOPE) - return() - endif() - endforeach() - - if(NOT WIN32 AND NOT APPLE) - file(GLOB _versioned_candidates - "${DIR_PATH}/${MODULE_BASENAME}.so.*" - "${DIR_PATH}/lib${MODULE_BASENAME}.so.*" - ) - list(LENGTH _versioned_candidates _versioned_count) - if(_versioned_count GREATER 0) - list(GET _versioned_candidates 0 _versioned_first) - set(${OUT_VAR} "${_versioned_first}" PARENT_SCOPE) - return() - endif() - endif() - - if(REQUIRED_FLAG) - message(FATAL_ERROR "Could not find module ${MODULE_BASENAME} in ${DIR_PATH}") - endif() - - set(${OUT_VAR} "" PARENT_SCOPE) -endfunction() - -file(MAKE_DIRECTORY "${OVERRIDE_DIR}") - -if(DEFINED DXC_RUNTIME_DIR AND DEFINED DXC_MODULE_BASENAME AND NOT "${DXC_MODULE_BASENAME}" STREQUAL "") - find_module_file(_dxc_module_file "${DXC_RUNTIME_DIR}" "${DXC_MODULE_BASENAME}" TRUE) -else() - message(FATAL_ERROR "DXC_RUNTIME_DIR and DXC_MODULE_BASENAME are required") -endif() - -if(DEFINED NABLA_RUNTIME_DIR AND EXISTS "${NABLA_RUNTIME_DIR}") - if(DEFINED NABLA_MODULE_BASENAME AND NOT "${NABLA_MODULE_BASENAME}" STREQUAL "") - find_module_file(_nabla_module_file "${NABLA_RUNTIME_DIR}" "${NABLA_MODULE_BASENAME}" FALSE) - set(_nabla_modules "${_nabla_module_file}") - else() - if(WIN32) - file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.dll") - elseif(APPLE) - file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.dylib" "${NABLA_RUNTIME_DIR}/libNabla*.dylib") - else() - file(GLOB _nabla_modules "${NABLA_RUNTIME_DIR}/Nabla*.so" "${NABLA_RUNTIME_DIR}/Nabla*.so.*" "${NABLA_RUNTIME_DIR}/libNabla*.so" "${NABLA_RUNTIME_DIR}/libNabla*.so.*") - endif() - endif() - - foreach(_nabla_module IN LISTS _nabla_modules) - if(NOT "${_nabla_module}" STREQUAL "") - execute_process( - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${_nabla_module}" "${OVERRIDE_DIR}" - RESULT_VARIABLE _copy_nabla_rv - ) - if(NOT _copy_nabla_rv EQUAL 0) - message(FATAL_ERROR "Failed to copy Nabla module from ${_nabla_module} to ${OVERRIDE_DIR}") - endif() - endif() - endforeach() -endif() - -execute_process( - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${_dxc_module_file}" "${OVERRIDE_DIR}" - RESULT_VARIABLE _copy_dxc_rv -) -if(NOT _copy_dxc_rv EQUAL 0) - message(FATAL_ERROR "Failed to copy DXC module from ${_dxc_module_file} to ${OVERRIDE_DIR}") -endif() - -execute_process( - COMMAND "${SMOKE_EXE}" - RESULT_VARIABLE _smoke_rv -) -if(NOT _smoke_rv EQUAL 0) - message(FATAL_ERROR "smoke_override failed with exit code ${_smoke_rv}") -endif() From 35f348bb7cb7b75cb0abc221769f71b8f79b107e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 16:15:54 +0100 Subject: [PATCH 28/54] Add one call runtime lookup flow for package consumers --- .github/workflows/build-nabla.yml | 9 +++++ cmake/NablaConfig.cmake.in | 49 +++++++++++++++++++++++- include/nbl/system/RuntimeModuleLookup.h | 4 ++ smoke/CMakeLists.txt | 17 +++++--- src/nbl/CMakeLists.txt | 7 ++++ 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index 704040514e..1e2262a889 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -393,3 +393,12 @@ jobs: - name: CTest Smoke run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} + + - name: Configure Smoke Custom Lookup + run: cmake -S smoke -B smoke/out -D NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON + + - name: Build Smoke Custom Lookup + run: cmake --build smoke/out --config ${{ matrix.config }} + + - name: CTest Smoke Custom Lookup + run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index 44b2a1abcb..6302c1fa16 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -15,6 +15,53 @@ endif() include("${CMAKE_CURRENT_LIST_DIR}/NablaExportTargets.cmake") check_required_components(Nabla) +# Config mapping note: +# Runtime copy sources are resolved from $. +# CMake applies imported-config mapping (MAP_IMPORTED_CONFIG_*) to this expression. +# If a consumer overrides mapping on Nabla::Nabla, do it before this call. +function(nabla_enable_custom_install_lookup _TARGET) + if(NOT TARGET "${_TARGET}") + message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") + endif() + + set(_nbl_runtime_subdir "your_custom_package") + set(_nbl_libraries_subdir "Libraries") + + set(_nbl_options "") + set(_nbl_one_value_args RUNTIME_SUBDIR LIBRARIES_SUBDIR) + set(_nbl_multi_value_args "") + cmake_parse_arguments(_NBL_CUSTOM "${_nbl_options}" "${_nbl_one_value_args}" "${_nbl_multi_value_args}" ${ARGN}) + + if(_NBL_CUSTOM_RUNTIME_SUBDIR) + set(_nbl_runtime_subdir "${_NBL_CUSTOM_RUNTIME_SUBDIR}") + endif() + if(_NBL_CUSTOM_LIBRARIES_SUBDIR) + set(_nbl_libraries_subdir "${_NBL_CUSTOM_LIBRARIES_SUBDIR}") + endif() + + get_property(_nbl_is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) + if(_nbl_is_multi_config) + foreach(_nbl_cfg IN LISTS CMAKE_CONFIGURATION_TYPES) + string(TOUPPER "${_nbl_cfg}" _nbl_cfg_upper) + set_property(TARGET "${_TARGET}" PROPERTY "RUNTIME_OUTPUT_DIRECTORY_${_nbl_cfg_upper}" "${CMAKE_CURRENT_BINARY_DIR}/${_nbl_cfg}/${_nbl_runtime_subdir}") + endforeach() + else() + set_target_properties("${_TARGET}" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${_nbl_runtime_subdir}") + endif() + + target_compile_definitions("${_TARGET}" PRIVATE + NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_nbl_libraries_subdir}" + NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_nbl_libraries_subdir}" + ) + + add_custom_command(TARGET "${_TARGET}" POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory "$/${_nbl_libraries_subdir}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$/${_nbl_libraries_subdir}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "$/${_nbl_libraries_subdir}" + VERBATIM + ) +endfunction() + if(NABLA_FIND_PACKAGE_VERBOSE) message(STATUS "\n-- Nabla_ROOT = ${Nabla_ROOT}" @@ -29,4 +76,4 @@ if(NABLA_FIND_PACKAGE_VERBOSE) "-- Nabla's DXC module git info:" "\n${_nabla_dxc_git_info_raw}" ) -endif() \ No newline at end of file +endif() diff --git a/include/nbl/system/RuntimeModuleLookup.h b/include/nbl/system/RuntimeModuleLookup.h index 8268cd7e55..616242524c 100644 --- a/include/nbl/system/RuntimeModuleLookup.h +++ b/include/nbl/system/RuntimeModuleLookup.h @@ -46,9 +46,13 @@ struct RuntimeModuleLookup final #endif #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; + #elif defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_DEFAULT) + nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_DEFAULT; #endif #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR; + #elif defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_DEFAULT) + dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR_DEFAULT; #endif #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) nabla.runtimeAbsKey = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index 2cd3c28d00..42b5e8f262 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -30,11 +30,18 @@ add_executable(smoke main.cpp pch.hpp cdb.ps1) target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) -set_target_properties(smoke PROPERTIES - RUNTIME_OUTPUT_DIRECTORY_DEBUG "${Nabla_ROOT}/debug/bin" - RUNTIME_OUTPUT_DIRECTORY_RELEASE "${Nabla_ROOT}/bin" - RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO "${Nabla_ROOT}/relwithdebinfo/bin" -) + +option(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP "Use custom install lookup layout for smoke runtime modules" OFF) +if(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP) + if(NOT COMMAND nabla_enable_custom_install_lookup) + message(FATAL_ERROR "Nabla package does not expose nabla_enable_custom_install_lookup") + endif() + + nabla_enable_custom_install_lookup(smoke + RUNTIME_SUBDIR "your_custom_package" + LIBRARIES_SUBDIR "Libraries" + ) +endif() set(CMAKE_CTEST_ARGUMENTS --verbose) enable_testing() diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 39d74994da..e816e87e43 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -866,10 +866,17 @@ nbl_install_dir_spec(../../include/nbl/application_templates nbl) # note: order important, keep after install rules due to NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH property get_property(_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_ GLOBAL PROPERTY NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH) get_target_property(_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_ Nabla NBL_PACKAGE_RUNTIME_DLL_DIR_PATH) +set(_NBL_NABLA_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_ + "$>,$,$>>>" +) +set(_NBL_NABLA_RUNTIME_DLL_DIR_PATH_FROM_CONSUMER_EXE_GE_ + "$,${_NBL_NABLA_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_},$>>" +) target_compile_definitions(Nabla INTERFACE NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY="${_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_}" INTERFACE NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY="${_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_}" + INTERFACE "$" ) NBL_ADJUST_FOLDERS(src) From b5bc065ecff074d02a681483724d1a1f9c7dd262 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 17:58:04 +0100 Subject: [PATCH 29/54] Add smoke install selftest and runtime module install rules --- .github/workflows/build-nabla.yml | 12 +++ cmake/NablaConfig.cmake.in | 110 ++++++++++++++++++++------- smoke/CMakeLists.txt | 65 +++++++++++----- smoke/CTestTestfile.install.cmake.in | 10 +++ smoke/NablaSmokeTests.cmake | 39 ++++++++++ 5 files changed, 188 insertions(+), 48 deletions(-) create mode 100644 smoke/CTestTestfile.install.cmake.in create mode 100644 smoke/NablaSmokeTests.cmake diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index 1e2262a889..e0b5a89fdc 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -402,3 +402,15 @@ jobs: - name: CTest Smoke Custom Lookup run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} + + - name: Configure Smoke Installed Runtime Test + run: cmake -S smoke -B smoke/out -D NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON -D NBL_SMOKE_INSTALL_SELFTEST=ON + + - name: Build Smoke Installed Runtime Test + run: cmake --build smoke/out --config ${{ matrix.config }} + + - name: Install Smoke Installed Runtime Test + run: cmake --install smoke/out --config ${{ matrix.config }} --prefix smoke/out/install + + - name: CTest Smoke Installed Runtime Test + run: ctest --verbose --test-dir smoke/out/install --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index 6302c1fa16..adf1ab9799 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -15,51 +15,107 @@ endif() include("${CMAKE_CURRENT_LIST_DIR}/NablaExportTargets.cmake") check_required_components(Nabla) -# Config mapping note: -# Runtime copy sources are resolved from $. -# CMake applies imported-config mapping (MAP_IMPORTED_CONFIG_*) to this expression. -# If a consumer overrides mapping on Nabla::Nabla, do it before this call. -function(nabla_enable_custom_install_lookup _TARGET) +# +# nabla_setup_runtime_modules( [RUNTIME_MODULES_SUBDIR ] [INSTALL_RULES ]) +# +# One-call runtime setup for Nabla and DXC modules. +# This function does not modify output directories of . +# It resolves destinations from the executable location at build time: +# $> +# +# Behavior: +# - Adds runtime lookup defines on : +# NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./" +# NBL_CPACK_PACKAGE_DXC_DLL_DIR="./" +# - Copies runtime modules into: +# $>/ +# - Optionally emits install rules (INSTALL_RULES ON): +# ${CMAKE_INSTALL_BINDIR}/ +# +# Typical usage: +# 1) Consumer follows Nabla package layout and keeps runtime next to the app: +# target_link_libraries(my_app PRIVATE Nabla::Nabla) +# # no extra call required +# +# 2) Consumer uses custom runtime folder, for example: +# /Libraries +# nabla_setup_runtime_modules(my_app RUNTIME_MODULES_SUBDIR "Libraries") +# +# 3) Consumer also installs runtime modules together with app install tree: +# install(TARGETS my_app RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") +# nabla_setup_runtime_modules( +# my_app +# RUNTIME_MODULES_SUBDIR "Libraries" +# INSTALL_RULES ON +# ) +# +# Config mapping: +# - Source module path is resolved from $. +# - Imported-config mapping (MAP_IMPORTED_CONFIG_*) applies automatically. +# - Mapping can be overridden before or after this call in one configure run. +# - If using CMAKE_MAP_IMPORTED_CONFIG_, set it before find_package(Nabla). +# - Install rules place runtime modules under CMAKE_INSTALL_BINDIR and assume app runtime is +# installed there (or with equivalent relative layout). +# +function(nabla_setup_runtime_modules _TARGET) if(NOT TARGET "${_TARGET}") message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") endif() - set(_nbl_runtime_subdir "your_custom_package") - set(_nbl_libraries_subdir "Libraries") + set(_nbl_runtime_modules_subdir "Libraries") + set(_nbl_install_rules OFF) set(_nbl_options "") - set(_nbl_one_value_args RUNTIME_SUBDIR LIBRARIES_SUBDIR) + set(_nbl_one_value_args RUNTIME_MODULES_SUBDIR INSTALL_RULES) set(_nbl_multi_value_args "") cmake_parse_arguments(_NBL_CUSTOM "${_nbl_options}" "${_nbl_one_value_args}" "${_nbl_multi_value_args}" ${ARGN}) - if(_NBL_CUSTOM_RUNTIME_SUBDIR) - set(_nbl_runtime_subdir "${_NBL_CUSTOM_RUNTIME_SUBDIR}") - endif() - if(_NBL_CUSTOM_LIBRARIES_SUBDIR) - set(_nbl_libraries_subdir "${_NBL_CUSTOM_LIBRARIES_SUBDIR}") + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") endif() - - get_property(_nbl_is_multi_config GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) - if(_nbl_is_multi_config) - foreach(_nbl_cfg IN LISTS CMAKE_CONFIGURATION_TYPES) - string(TOUPPER "${_nbl_cfg}" _nbl_cfg_upper) - set_property(TARGET "${_TARGET}" PROPERTY "RUNTIME_OUTPUT_DIRECTORY_${_nbl_cfg_upper}" "${CMAKE_CURRENT_BINARY_DIR}/${_nbl_cfg}/${_nbl_runtime_subdir}") - endforeach() - else() - set_target_properties("${_TARGET}" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${_nbl_runtime_subdir}") + if(DEFINED _NBL_CUSTOM_INSTALL_RULES) + set(_nbl_install_rules "${_NBL_CUSTOM_INSTALL_RULES}") endif() target_compile_definitions("${_TARGET}" PRIVATE - NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_nbl_libraries_subdir}" - NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_nbl_libraries_subdir}" + NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_nbl_runtime_modules_subdir}" + NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_nbl_runtime_modules_subdir}" ) add_custom_command(TARGET "${_TARGET}" POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory "$/${_nbl_libraries_subdir}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$/${_nbl_libraries_subdir}/" - COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "$/${_nbl_libraries_subdir}" + COMMAND ${CMAKE_COMMAND} -E make_directory "$/${_nbl_runtime_modules_subdir}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$/${_nbl_runtime_modules_subdir}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "$/${_nbl_runtime_modules_subdir}" VERBATIM ) + + if(_nbl_install_rules) + if(NOT DEFINED CMAKE_INSTALL_BINDIR) + include(GNUInstallDirs) + endif() + + set(_nbl_install_modules_dest "${CMAKE_INSTALL_BINDIR}/${_nbl_runtime_modules_subdir}") + string(MD5 _nbl_install_modules_dest_key "${_nbl_install_modules_dest}") + get_property(_nbl_install_rules_added GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}") + if(NOT _nbl_install_rules_added) + install(FILES "$" + DESTINATION "${_nbl_install_modules_dest}" + ) + install(DIRECTORY "$,3rdparty,dxc>/" + DESTINATION "${_nbl_install_modules_dest}" + ) + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}" TRUE) + endif() + endif() +endfunction() + +# Backward-compatible alias. Keep temporarily for external consumers. +function(nabla_enable_custom_install_lookup _TARGET) + message(DEPRECATION + "nabla_enable_custom_install_lookup is deprecated. " + "Use nabla_setup_runtime_modules instead." + ) + nabla_setup_runtime_modules("${_TARGET}" ${ARGN}) endfunction() if(NABLA_FIND_PACKAGE_VERBOSE) diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index 42b5e8f262..a6f05f9795 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -18,6 +18,7 @@ add_compile_options( set(CMAKE_SYSTEM_VERSION 10.0) project(NablaSmoke CXX) +include(${CMAKE_CURRENT_LIST_DIR}/NablaSmokeTests.cmake) # default hint for our CI, normally it needs to be path to package's directory where all autogen config .cmake scripts are set(PACKAGE_CONFIG_SEARCH_PATHS ${CMAKE_CURRENT_LIST_DIR}/build-ct/install/cmake ${PACKAGE_CONFIG_SEARCH_PATH_HINTS}) @@ -32,42 +33,64 @@ target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) option(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP "Use custom install lookup layout for smoke runtime modules" OFF) +option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" OFF) +if(NBL_SMOKE_INSTALL_SELFTEST AND NOT NBL_SMOKE_CUSTOM_INSTALL_LOOKUP) + message(FATAL_ERROR "NBL_SMOKE_INSTALL_SELFTEST requires NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON") +endif() if(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP) - if(NOT COMMAND nabla_enable_custom_install_lookup) - message(FATAL_ERROR "Nabla package does not expose nabla_enable_custom_install_lookup") + set(_nbl_smoke_runtime_modules_setup_args + RUNTIME_MODULES_SUBDIR "Libraries" + ) + if(NBL_SMOKE_INSTALL_SELFTEST) + list(APPEND _nbl_smoke_runtime_modules_setup_args INSTALL_RULES ON) endif() - nabla_enable_custom_install_lookup(smoke - RUNTIME_SUBDIR "your_custom_package" - LIBRARIES_SUBDIR "Libraries" + nabla_setup_runtime_modules(smoke + ${_nbl_smoke_runtime_modules_setup_args} ) endif() set(CMAKE_CTEST_ARGUMENTS --verbose) enable_testing() -set(OPTS +set(NBL_SMOKE_TEST_ENVIRONMENT NBL_EXPLICIT_MODULE_LOAD_LOG=1 NBL_EXPLICIT_MODULE_REQUEST_LOG=1 ) option(ENABLE_CRASH_HANDLER "Enable crash handler" ON) -if(WIN32) - if(ENABLE_CRASH_HANDLER) - set(CMD - powershell -NoProfile -ExecutionPolicy Bypass - -File "$" - -Exe "$" - ) - endif() -endif() +nabla_smoke_add_install_load_api_test( + TEST_NAME NBL_INSTALL_LOAD_API + EXE_PATH "$" + CRASH_HANDLER_SCRIPT "$" + ENABLE_CRASH_HANDLER ${ENABLE_CRASH_HANDLER} + ENVIRONMENT "${NBL_SMOKE_TEST_ENVIRONMENT}" +) + +if(NBL_SMOKE_INSTALL_SELFTEST) + include(GNUInstallDirs) -if(NOT ENABLE_CRASH_HANDLER) - set(CMD - "$" + install(TARGETS smoke + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + install(FILES "${CMAKE_CURRENT_LIST_DIR}/cdb.ps1" + DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + + set(_nbl_smoke_install_cmake_dir "${CMAKE_INSTALL_DATADIR}/nabla-smoke") + install(FILES "${CMAKE_CURRENT_LIST_DIR}/NablaSmokeTests.cmake" + DESTINATION "${_nbl_smoke_install_cmake_dir}" ) -endif() -add_test(NAME NBL_INSTALL_LOAD_API COMMAND ${CMD}) -set_tests_properties(NBL_INSTALL_LOAD_API PROPERTIES ENVIRONMENT "${OPTS}") + set(NBL_SMOKE_INSTALL_CMAKE_DIR "${_nbl_smoke_install_cmake_dir}") + configure_file( + "${CMAKE_CURRENT_LIST_DIR}/CTestTestfile.install.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.install.cmake" + @ONLY + ) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.install.cmake" + DESTINATION "." + RENAME "CTestTestfile.cmake" + ) +endif() diff --git a/smoke/CTestTestfile.install.cmake.in b/smoke/CTestTestfile.install.cmake.in new file mode 100644 index 0000000000..c5059ce5d8 --- /dev/null +++ b/smoke/CTestTestfile.install.cmake.in @@ -0,0 +1,10 @@ +include("@NBL_SMOKE_INSTALL_CMAKE_DIR@/NablaSmokeTests.cmake") + +nabla_smoke_add_install_load_api_test( + TEST_NAME NBL_INSTALL_LOAD_API + EXE_PATH "@CMAKE_INSTALL_BINDIR@/smoke@CMAKE_EXECUTABLE_SUFFIX@" + CRASH_HANDLER_SCRIPT "@CMAKE_INSTALL_BINDIR@/cdb.ps1" + ENABLE_CRASH_HANDLER @ENABLE_CRASH_HANDLER@ + LEGACY_CTEST_MODE + ENVIRONMENT "@NBL_SMOKE_TEST_ENVIRONMENT@" +) diff --git a/smoke/NablaSmokeTests.cmake b/smoke/NablaSmokeTests.cmake new file mode 100644 index 0000000000..9e2b796ff7 --- /dev/null +++ b/smoke/NablaSmokeTests.cmake @@ -0,0 +1,39 @@ +function(nabla_smoke_add_install_load_api_test) + set(_nbl_smoke_options LEGACY_CTEST_MODE) + set(_nbl_smoke_one_value_args TEST_NAME EXE_PATH CRASH_HANDLER_SCRIPT ENABLE_CRASH_HANDLER) + set(_nbl_smoke_multi_value_args ENVIRONMENT) + cmake_parse_arguments(_NBL_SMOKE "${_nbl_smoke_options}" "${_nbl_smoke_one_value_args}" "${_nbl_smoke_multi_value_args}" ${ARGN}) + + if(NOT _NBL_SMOKE_TEST_NAME) + message(FATAL_ERROR "nabla_smoke_add_install_load_api_test requires TEST_NAME") + endif() + if(NOT _NBL_SMOKE_EXE_PATH) + message(FATAL_ERROR "nabla_smoke_add_install_load_api_test requires EXE_PATH") + endif() + + if(WIN32 AND _NBL_SMOKE_ENABLE_CRASH_HANDLER) + if(_NBL_SMOKE_LEGACY_CTEST_MODE) + add_test("${_NBL_SMOKE_TEST_NAME}" + powershell -NoProfile -ExecutionPolicy Bypass + -File "${_NBL_SMOKE_CRASH_HANDLER_SCRIPT}" + -Exe "${_NBL_SMOKE_EXE_PATH}" + ) + else() + add_test(NAME "${_NBL_SMOKE_TEST_NAME}" COMMAND + powershell -NoProfile -ExecutionPolicy Bypass + -File "${_NBL_SMOKE_CRASH_HANDLER_SCRIPT}" + -Exe "${_NBL_SMOKE_EXE_PATH}" + ) + endif() + else() + if(_NBL_SMOKE_LEGACY_CTEST_MODE) + add_test("${_NBL_SMOKE_TEST_NAME}" "${_NBL_SMOKE_EXE_PATH}") + else() + add_test(NAME "${_NBL_SMOKE_TEST_NAME}" COMMAND "${_NBL_SMOKE_EXE_PATH}") + endif() + endif() + + if(_NBL_SMOKE_ENVIRONMENT) + set_tests_properties("${_NBL_SMOKE_TEST_NAME}" PROPERTIES ENVIRONMENT "${_NBL_SMOKE_ENVIRONMENT}") + endif() +endfunction() From 61e250bbce290fc0e53bd64bbe11e517506e132a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sat, 21 Feb 2026 20:36:42 +0100 Subject: [PATCH 30/54] Consolidate smoke runtime flows and CI execution --- .github/workflows/build-nabla.yml | 32 +---- cmake/NablaConfig.cmake.in | 114 ++++++++--------- docs/consume/README.md | 142 +++++++++++++++++++++ include/nbl/system/IApplicationFramework.h | 2 +- include/nbl/system/RuntimeModuleLookup.h | 59 +++++++-- smoke/CMakeLists.txt | 27 ++-- smoke/RunSmokeFlow.cmake | 88 +++++++++++++ src/nbl/CMakeLists.txt | 24 +++- 8 files changed, 376 insertions(+), 112 deletions(-) create mode 100644 docs/consume/README.md create mode 100644 smoke/RunSmokeFlow.cmake diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index e0b5a89fdc..47a0ac8fc0 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -385,32 +385,8 @@ jobs: if (-not (Test-Path "smoke/build-ct/install")) { throw "smoke/build-ct/install not found" } tree.com smoke /F - - name: Configure Smoke - run: cmake -S smoke -B smoke/out + - name: Smoke Flow NO_BUILD_COPY + run: cmake -D FLOW=NO_BUILD_COPY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake - - name: Build Smoke - run: cmake --build smoke/out --config ${{ matrix.config }} - - - name: CTest Smoke - run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} - - - name: Configure Smoke Custom Lookup - run: cmake -S smoke -B smoke/out -D NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON - - - name: Build Smoke Custom Lookup - run: cmake --build smoke/out --config ${{ matrix.config }} - - - name: CTest Smoke Custom Lookup - run: ctest --verbose --test-dir smoke/out --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} - - - name: Configure Smoke Installed Runtime Test - run: cmake -S smoke -B smoke/out -D NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON -D NBL_SMOKE_INSTALL_SELFTEST=ON - - - name: Build Smoke Installed Runtime Test - run: cmake --build smoke/out --config ${{ matrix.config }} - - - name: Install Smoke Installed Runtime Test - run: cmake --install smoke/out --config ${{ matrix.config }} --prefix smoke/out/install - - - name: CTest Smoke Installed Runtime Test - run: ctest --verbose --test-dir smoke/out/install --force-new-ctest-process --output-on-failure --no-tests=error -C ${{ matrix.config }} + - name: Smoke Flow WITH_BUILD_COPY + run: cmake -D FLOW=WITH_BUILD_COPY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index adf1ab9799..d1b6d836e0 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -17,46 +17,74 @@ check_required_components(Nabla) # # nabla_setup_runtime_modules( [RUNTIME_MODULES_SUBDIR ] [INSTALL_RULES ]) +# nabla_setup_runtime_install_modules( [RUNTIME_MODULES_SUBDIR ]) # -# One-call runtime setup for Nabla and DXC modules. -# This function does not modify output directories of . -# It resolves destinations from the executable location at build time: -# $> +# Runtime setup helpers for Nabla and DXC modules. # -# Behavior: +# Common behavior: # - Adds runtime lookup defines on : # NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./" # NBL_CPACK_PACKAGE_DXC_DLL_DIR="./" -# - Copies runtime modules into: -# $>/ -# - Optionally emits install rules (INSTALL_RULES ON): -# ${CMAKE_INSTALL_BINDIR}/ -# -# Typical usage: -# 1) Consumer follows Nabla package layout and keeps runtime next to the app: -# target_link_libraries(my_app PRIVATE Nabla::Nabla) -# # no extra call required # -# 2) Consumer uses custom runtime folder, for example: -# /Libraries -# nabla_setup_runtime_modules(my_app RUNTIME_MODULES_SUBDIR "Libraries") +# nabla_setup_runtime_modules: +# - Adds build-time copy into: +# $>/ +# - Optionally installs runtime modules when INSTALL_RULES is ON. # -# 3) Consumer also installs runtime modules together with app install tree: -# install(TARGETS my_app RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") -# nabla_setup_runtime_modules( -# my_app -# RUNTIME_MODULES_SUBDIR "Libraries" -# INSTALL_RULES ON -# ) +# nabla_setup_runtime_install_modules: +# - Adds install rules only (no build-time copy): +# ${CMAKE_INSTALL_BINDIR}/ # # Config mapping: # - Source module path is resolved from $. # - Imported-config mapping (MAP_IMPORTED_CONFIG_*) applies automatically. -# - Mapping can be overridden before or after this call in one configure run. # - If using CMAKE_MAP_IMPORTED_CONFIG_, set it before find_package(Nabla). -# - Install rules place runtime modules under CMAKE_INSTALL_BINDIR and assume app runtime is -# installed there (or with equivalent relative layout). # +function(_nbl_runtime_modules_apply_lookup_definitions _TARGET _RUNTIME_MODULES_SUBDIR) + target_compile_definitions("${_TARGET}" PRIVATE + NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_RUNTIME_MODULES_SUBDIR}" + NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_RUNTIME_MODULES_SUBDIR}" + ) +endfunction() + +function(_nbl_runtime_modules_add_install_rules _RUNTIME_MODULES_SUBDIR) + if(NOT DEFINED CMAKE_INSTALL_BINDIR) + include(GNUInstallDirs) + endif() + + set(_nbl_install_modules_dest "${CMAKE_INSTALL_BINDIR}/${_RUNTIME_MODULES_SUBDIR}") + string(MD5 _nbl_install_modules_dest_key "${_nbl_install_modules_dest}") + get_property(_nbl_install_rules_added GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}") + if(NOT _nbl_install_rules_added) + install(FILES "$" + DESTINATION "${_nbl_install_modules_dest}" + ) + install(DIRECTORY "$,3rdparty,dxc>/" + DESTINATION "${_nbl_install_modules_dest}" + ) + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}" TRUE) + endif() +endfunction() + +function(nabla_setup_runtime_install_modules _TARGET) + if(NOT TARGET "${_TARGET}") + message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") + endif() + + set(_nbl_runtime_modules_subdir "Libraries") + set(_nbl_options "") + set(_nbl_one_value_args RUNTIME_MODULES_SUBDIR) + set(_nbl_multi_value_args "") + cmake_parse_arguments(_NBL_CUSTOM "${_nbl_options}" "${_nbl_one_value_args}" "${_nbl_multi_value_args}" ${ARGN}) + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + + _nbl_runtime_modules_apply_lookup_definitions("${_TARGET}" "${_nbl_runtime_modules_subdir}") + _nbl_runtime_modules_add_install_rules("${_nbl_runtime_modules_subdir}") +endfunction() + function(nabla_setup_runtime_modules _TARGET) if(NOT TARGET "${_TARGET}") message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") @@ -64,7 +92,6 @@ function(nabla_setup_runtime_modules _TARGET) set(_nbl_runtime_modules_subdir "Libraries") set(_nbl_install_rules OFF) - set(_nbl_options "") set(_nbl_one_value_args RUNTIME_MODULES_SUBDIR INSTALL_RULES) set(_nbl_multi_value_args "") @@ -77,10 +104,7 @@ function(nabla_setup_runtime_modules _TARGET) set(_nbl_install_rules "${_NBL_CUSTOM_INSTALL_RULES}") endif() - target_compile_definitions("${_TARGET}" PRIVATE - NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_nbl_runtime_modules_subdir}" - NBL_CPACK_PACKAGE_DXC_DLL_DIR="./${_nbl_runtime_modules_subdir}" - ) + _nbl_runtime_modules_apply_lookup_definitions("${_TARGET}" "${_nbl_runtime_modules_subdir}") add_custom_command(TARGET "${_TARGET}" POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory "$/${_nbl_runtime_modules_subdir}" @@ -90,34 +114,10 @@ function(nabla_setup_runtime_modules _TARGET) ) if(_nbl_install_rules) - if(NOT DEFINED CMAKE_INSTALL_BINDIR) - include(GNUInstallDirs) - endif() - - set(_nbl_install_modules_dest "${CMAKE_INSTALL_BINDIR}/${_nbl_runtime_modules_subdir}") - string(MD5 _nbl_install_modules_dest_key "${_nbl_install_modules_dest}") - get_property(_nbl_install_rules_added GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}") - if(NOT _nbl_install_rules_added) - install(FILES "$" - DESTINATION "${_nbl_install_modules_dest}" - ) - install(DIRECTORY "$,3rdparty,dxc>/" - DESTINATION "${_nbl_install_modules_dest}" - ) - set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_INSTALL_RULES_${_nbl_install_modules_dest_key}" TRUE) - endif() + _nbl_runtime_modules_add_install_rules("${_nbl_runtime_modules_subdir}") endif() endfunction() -# Backward-compatible alias. Keep temporarily for external consumers. -function(nabla_enable_custom_install_lookup _TARGET) - message(DEPRECATION - "nabla_enable_custom_install_lookup is deprecated. " - "Use nabla_setup_runtime_modules instead." - ) - nabla_setup_runtime_modules("${_TARGET}" ${ARGN}) -endfunction() - if(NABLA_FIND_PACKAGE_VERBOSE) message(STATUS "\n-- Nabla_ROOT = ${Nabla_ROOT}" diff --git a/docs/consume/README.md b/docs/consume/README.md new file mode 100644 index 0000000000..4b746c6618 --- /dev/null +++ b/docs/consume/README.md @@ -0,0 +1,142 @@ +# Consuming Nabla Package + +This document describes how to consume an installed Nabla package from another CMake project. + +## 1. Package API + +After `find_package(Nabla CONFIG REQUIRED)`, the package provides: + +- imported target `Nabla::Nabla` +- helper `nabla_setup_runtime_modules(...)` +- helper `nabla_setup_runtime_install_modules(...)` + +On shared builds, runtime modules include Nabla and DXC. + +## 2. Locate the package + +You can point CMake to the package with: + +- `-D Nabla_DIR=/cmake` +- `CMAKE_PREFIX_PATH=` + +Minimal baseline: + +```cmake +cmake_minimum_required(VERSION 3.30) +project(MyApp CXX) + +find_package(Nabla REQUIRED CONFIG) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE Nabla::Nabla) +``` + +## 3. Flow NO_BUILD_COPY install to e.g. `./Libraries` + +Use this flow when: + +- build-time should load directly from package +- install tree should load from e.g. `./Libraries` + +Call install-only helper: + +```cmake +include(GNUInstallDirs) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE Nabla::Nabla) + +nabla_setup_runtime_install_modules(my_app + RUNTIME_MODULES_SUBDIR "Libraries" +) + +install(TARGETS my_app + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) +``` + +What it does: + +- adds runtime lookup defines `./Libraries` +- adds install rules for Nabla/DXC runtime modules to `${CMAKE_INSTALL_BINDIR}/Libraries` +- does not add post-build copy + +Runtime behavior: + +- build tree falls back to package runtime if `./Libraries` does not exist and relative package lookup can be resolved +- install tree uses `./Libraries` once modules are installed there + +## 4. Flow WITH_BUILD_COPY install to e.g. `./Libraries` + +Use one call when you want both: + +- build-time copy to runtime subdir +- install-time copy to runtime subdir + +```cmake +include(GNUInstallDirs) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE Nabla::Nabla) + +nabla_setup_runtime_modules(my_app + RUNTIME_MODULES_SUBDIR "Libraries" + INSTALL_RULES ON +) + +install(TARGETS my_app + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +) +``` + +## 5. Config mapping + +Runtime source paths are resolved from `$`. + +Imported-config mapping applies automatically. This includes cross-config usage when one consumer config maps to a different imported config. + +If you override mapping: + +- do it in the same configure run +- if using `CMAKE_MAP_IMPORTED_CONFIG_`, set it before `find_package(Nabla)` + +## 6. Troubleshooting + +### `Could not load dxcompiler module` or `Could not load Nabla API` + +Check: + +- helper usage matches your intended flow mode +- `RUNTIME_MODULES_SUBDIR` matches actual runtime folder layout +- install tree actually contains runtime modules under expected subdir + +### Build works but installed app fails + +Most often install rules are missing. + +Use either: + +- `nabla_setup_runtime_install_modules(...)` for `NO_BUILD_COPY` +- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` for `WITH_BUILD_COPY` + +### Build tree cannot resolve package runtime in install-only mode + +This usually means your build tree and package runtime are on different roots or drives so a relative fallback cannot be formed. + +Use one of: + +- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` to copy runtime modules into build tree + +### Why modules are copied in build tree + +Only `nabla_setup_runtime_modules(... INSTALL_RULES ON)` performs build-time copy. + +If you want no build copy, use `nabla_setup_runtime_install_modules(...)` instead. + +## 7. Design guidance + +For relocatable consumers: + +- keep lookup relative to executable +- never expose absolute paths in public compile definitions +- use one of the helper flows consistently per target diff --git a/include/nbl/system/IApplicationFramework.h b/include/nbl/system/IApplicationFramework.h index 82da25cfb5..53af9c9b94 100644 --- a/include/nbl/system/IApplicationFramework.h +++ b/include/nbl/system/IApplicationFramework.h @@ -42,7 +42,7 @@ class IApplicationFramework : public core::IReferenceCounted Desired end state is that build outputs follow the same relative runtime layout as install so lookup can stay install-style for both host build and package consumers while still allowing consumer override paths like "./Libraries". - No interface should expose any define that contains an absolute path. + No interface should ever expose any define that contains an absolute path. All binaries must be emitted into the build directory and Nabla should remain fully buildable with a read-only source filesystem. diff --git a/include/nbl/system/RuntimeModuleLookup.h b/include/nbl/system/RuntimeModuleLookup.h index 616242524c..dd6ce35e67 100644 --- a/include/nbl/system/RuntimeModuleLookup.h +++ b/include/nbl/system/RuntimeModuleLookup.h @@ -20,6 +20,7 @@ struct RuntimeModuleLookup final std::string_view buildOutputDir = ""; std::string_view buildDllPath = ""; std::string_view installOverrideRel = ""; + std::string_view installBuildFallbackRel = ""; std::string_view runtimeAbsKey = ""; }; @@ -46,13 +47,15 @@ struct RuntimeModuleLookup final #endif #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR) nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR; - #elif defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_DEFAULT) - nabla.installOverrideRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_DEFAULT; #endif #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR) dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR; - #elif defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_DEFAULT) - dxc.installOverrideRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR_DEFAULT; + #endif + #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_BUILD_FALLBACK) + nabla.installBuildFallbackRel = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_BUILD_FALLBACK; + #endif + #if defined(NBL_CPACK_PACKAGE_DXC_DLL_DIR_BUILD_FALLBACK) + dxc.installBuildFallbackRel = NBL_CPACK_PACKAGE_DXC_DLL_DIR_BUILD_FALLBACK; #endif #if defined(NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY) nabla.runtimeAbsKey = NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY; @@ -76,11 +79,18 @@ struct RuntimeModuleLookup final { if (relocatablePackage) { - if (!hasCompleteInstallOverride()) - tryResolveInstallPathsFromPackageLayout(exeDirectory); + if (!hasUsableInstallPaths()) + { + if (!tryResolveInstallPathsFromPackageLayout(exeDirectory)) + tryResolveInstallPathsFromBuildFallbackHints(exeDirectory); + } return true; } - return hasUsableInstallPaths() || tryResolveInstallPathsFromPackageLayout(exeDirectory); + if (hasUsableInstallPaths()) + return true; + if (tryResolveInstallPathsFromPackageLayout(exeDirectory)) + return true; + return tryResolveInstallPathsFromBuildFallbackHints(exeDirectory); } inline void finalizeInstallLookups(bool useInstallLookups) @@ -117,7 +127,12 @@ struct RuntimeModuleLookup final { if (relativePath.empty() || exeDirectory.empty()) return system::path(""); - return std::filesystem::absolute(exeDirectory / system::path(relativePath)); + + const auto relPath = system::path(relativePath); + if (relPath.is_absolute()) + return system::path(""); + + return std::filesystem::absolute(exeDirectory / relPath); } inline bool hasUsableInstallPaths() const @@ -173,9 +188,33 @@ struct RuntimeModuleLookup final return false; } - inline bool hasCompleteInstallOverride() const + inline bool tryResolveInstallPathsFromBuildFallbackHints(const system::path& exeDirectory) { - return sharedBuild ? (hasInstallOverride(nabla) && hasInstallOverride(dxc)) : hasInstallOverride(dxc); + Module candidateNabla = nabla; + Module candidateDxc = dxc; + candidateNabla.paths.install = system::path(""); + candidateDxc.paths.install = system::path(""); + + if (!candidateNabla.installBuildFallbackRel.empty()) + candidateNabla.paths.install = absoluteFromExe(exeDirectory, candidateNabla.installBuildFallbackRel); + if (!candidateDxc.installBuildFallbackRel.empty()) + candidateDxc.paths.install = absoluteFromExe(exeDirectory, candidateDxc.installBuildFallbackRel); + + if (candidateDxc.paths.install.empty() && !candidateNabla.paths.install.empty() && hasRuntimeAbsKey(nabla) && hasRuntimeAbsKey(dxc)) + { + const auto dxcRelToNabla = system::path(dxc.runtimeAbsKey).lexically_relative(system::path(nabla.runtimeAbsKey)); + if (!dxcRelToNabla.empty() && dxcRelToNabla != system::path(".")) + candidateDxc.paths.install = std::filesystem::absolute(candidateNabla.paths.install / dxcRelToNabla); + } + + if (!moduleExistsInDirectory(candidateDxc.paths.install, candidateDxc.name)) + return false; + if (sharedBuild && !moduleExistsInDirectory(candidateNabla.paths.install, candidateNabla.name)) + return false; + + nabla.paths.install = candidateNabla.paths.install; + dxc.paths.install = candidateDxc.paths.install; + return true; } #if defined(_NBL_PLATFORM_WINDOWS_) diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index a6f05f9795..8de97448d1 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -32,22 +32,25 @@ target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) -option(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP "Use custom install lookup layout for smoke runtime modules" OFF) -option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" OFF) -if(NBL_SMOKE_INSTALL_SELFTEST AND NOT NBL_SMOKE_CUSTOM_INSTALL_LOOKUP) - message(FATAL_ERROR "NBL_SMOKE_INSTALL_SELFTEST requires NBL_SMOKE_CUSTOM_INSTALL_LOOKUP=ON") -endif() -if(NBL_SMOKE_CUSTOM_INSTALL_LOOKUP) - set(_nbl_smoke_runtime_modules_setup_args +set(NBL_SMOKE_FLOW "NO_BUILD_COPY" CACHE STRING "Smoke runtime flow: NO_BUILD_COPY or WITH_BUILD_COPY") +set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS NO_BUILD_COPY WITH_BUILD_COPY) +string(TOUPPER "${NBL_SMOKE_FLOW}" NBL_SMOKE_FLOW) +message(STATUS "Smoke runtime flow: ${NBL_SMOKE_FLOW}") +option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" ON) + +if(NBL_SMOKE_FLOW STREQUAL "NO_BUILD_COPY") + # No build-time copy, install-time runtime modules in ./Libraries. + nabla_setup_runtime_install_modules(smoke RUNTIME_MODULES_SUBDIR "Libraries" ) - if(NBL_SMOKE_INSTALL_SELFTEST) - list(APPEND _nbl_smoke_runtime_modules_setup_args INSTALL_RULES ON) - endif() - +elseif(NBL_SMOKE_FLOW STREQUAL "WITH_BUILD_COPY") + # Build-time copy + install-time runtime modules in ./Libraries. nabla_setup_runtime_modules(smoke - ${_nbl_smoke_runtime_modules_setup_args} + RUNTIME_MODULES_SUBDIR "Libraries" + INSTALL_RULES ON ) +else() + message(FATAL_ERROR "Invalid NBL_SMOKE_FLOW='${NBL_SMOKE_FLOW}'") endif() set(CMAKE_CTEST_ARGUMENTS --verbose) diff --git a/smoke/RunSmokeFlow.cmake b/smoke/RunSmokeFlow.cmake new file mode 100644 index 0000000000..64d8ea0e7d --- /dev/null +++ b/smoke/RunSmokeFlow.cmake @@ -0,0 +1,88 @@ +if(NOT DEFINED FLOW) + message(FATAL_ERROR "FLOW is required. Allowed values: NO_BUILD_COPY, WITH_BUILD_COPY") +endif() + +string(TOUPPER "${FLOW}" FLOW) +if(NOT FLOW MATCHES "^(NO_BUILD_COPY|WITH_BUILD_COPY)$") + message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: NO_BUILD_COPY, WITH_BUILD_COPY") +endif() + +if(NOT DEFINED CONFIG) + message(FATAL_ERROR "CONFIG is required (e.g. Debug, Release, RelWithDebInfo)") +endif() + +if(NOT DEFINED SMOKE_SOURCE_DIR) + set(SMOKE_SOURCE_DIR "smoke") +endif() + +if(NOT DEFINED BUILD_DIR) + set(BUILD_DIR "smoke/out") +endif() + +if(NOT DEFINED INSTALL_DIR) + set(INSTALL_DIR "${BUILD_DIR}/install") +endif() + +if(NOT DEFINED CTEST_BIN) + if(DEFINED CMAKE_CTEST_COMMAND) + set(CTEST_BIN "${CMAKE_CTEST_COMMAND}") + else() + find_program(CTEST_BIN ctest REQUIRED) + endif() +endif() + +function(run_cmd) + execute_process( + COMMAND ${ARGV} + COMMAND_ECHO STDOUT + RESULT_VARIABLE _rc + ) + if(NOT _rc EQUAL 0) + message(FATAL_ERROR "Command failed with exit code ${_rc}") + endif() +endfunction() + +file(REMOVE_RECURSE "${BUILD_DIR}") + +run_cmd( + "${CMAKE_COMMAND}" + -S "${SMOKE_SOURCE_DIR}" + -B "${BUILD_DIR}" + -D "NBL_SMOKE_FLOW=${FLOW}" + -D "NBL_SMOKE_INSTALL_SELFTEST=ON" +) + +run_cmd( + "${CMAKE_COMMAND}" + --build "${BUILD_DIR}" + --config "${CONFIG}" +) + +run_cmd( + "${CTEST_BIN}" + --verbose + --test-dir "${BUILD_DIR}" + --force-new-ctest-process + --output-on-failure + --no-tests=error + -C "${CONFIG}" +) + +file(REMOVE_RECURSE "${INSTALL_DIR}") + +run_cmd( + "${CMAKE_COMMAND}" + --install "${BUILD_DIR}" + --config "${CONFIG}" + --prefix "${INSTALL_DIR}" +) + +run_cmd( + "${CTEST_BIN}" + --verbose + --test-dir "${INSTALL_DIR}" + --force-new-ctest-process + --output-on-failure + --no-tests=error + -C "${CONFIG}" +) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index e816e87e43..18a25c8619 100644 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -866,17 +866,33 @@ nbl_install_dir_spec(../../include/nbl/application_templates nbl) # note: order important, keep after install rules due to NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH property get_property(_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_ GLOBAL PROPERTY NBL_3RDPARTY_DXC_NS_PACKAGE_RUNTIME_DLL_DIR_PATH) get_target_property(_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_ Nabla NBL_PACKAGE_RUNTIME_DLL_DIR_PATH) +set(_NBL_CONSUMER_BIN_DIR_GE_ + "$,$>>" +) +set(_NBL_NABLA_RUNTIME_DLL_DIR_GE_ + "$>" +) +set(_NBL_DXC_RUNTIME_DLL_DIR_GE_ + "$,3rdparty,dxc>>" +) +set(_NBL_NABLA_RUNTIME_SAME_ROOT_AS_CONSUMER_GE_ + "$,$>" +) +set(_NBL_DXC_RUNTIME_SAME_ROOT_AS_CONSUMER_GE_ + "$,$>" +) set(_NBL_NABLA_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_ - "$>,$,$>>>" + "$,>" ) -set(_NBL_NABLA_RUNTIME_DLL_DIR_PATH_FROM_CONSUMER_EXE_GE_ - "$,${_NBL_NABLA_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_},$>>" +set(_NBL_DXC_RUNTIME_DLL_DIR_PATH_REL_TO_CONSUMER_EXE_GE_ + "$,>" ) target_compile_definitions(Nabla INTERFACE NBL_CPACK_PACKAGE_NABLA_DLL_DIR_ABS_KEY="${_NBL_NABLA_PACKAGE_RUNTIME_DLL_DIR_PATH_}" INTERFACE NBL_CPACK_PACKAGE_DXC_DLL_DIR_ABS_KEY="${_NBL_DXC_PACKAGE_RUNTIME_DLL_DIR_PATH_}" - INTERFACE "$" + INTERFACE "$" + INTERFACE "$" ) NBL_ADJUST_FOLDERS(src) From edd1bec532cc68e832431dc2ed9b589907401666 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Feb 2026 00:23:39 +0100 Subject: [PATCH 31/54] Refine runtime module setup API and smoke flows --- .github/workflows/build-nabla.yml | 8 +- cmake/NablaConfig.cmake.in | 679 ++++++++++++++++++++++++++++-- docs/consume/README.md | 164 +++++--- smoke/CMakeLists.txt | 19 +- smoke/RunSmokeFlow.cmake | 6 +- 5 files changed, 763 insertions(+), 113 deletions(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index 47a0ac8fc0..fea595428f 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -385,8 +385,8 @@ jobs: if (-not (Test-Path "smoke/build-ct/install")) { throw "smoke/build-ct/install not found" } tree.com smoke /F - - name: Smoke Flow NO_BUILD_COPY - run: cmake -D FLOW=NO_BUILD_COPY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake + - name: Smoke Flow CONFIGURE_ONLY + run: cmake -D FLOW=CONFIGURE_ONLY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake - - name: Smoke Flow WITH_BUILD_COPY - run: cmake -D FLOW=WITH_BUILD_COPY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake + - name: Smoke Flow BUILD_ONLY + run: cmake -D FLOW=BUILD_ONLY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index d1b6d836e0..24d46834e5 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -16,30 +16,45 @@ include("${CMAKE_CURRENT_LIST_DIR}/NablaExportTargets.cmake") check_required_components(Nabla) # -# nabla_setup_runtime_modules( [RUNTIME_MODULES_SUBDIR ] [INSTALL_RULES ]) -# nabla_setup_runtime_install_modules( [RUNTIME_MODULES_SUBDIR ]) +# nabla_sync_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_ ]... +# [MODE ] +# [RUNTIME_MODULES_SUBDIR ] +# [BUILD_TRIGGER_TARGETS ] +# ) # -# Runtime setup helpers for Nabla and DXC modules. +# nabla_apply_runtime_lookup( +# TARGETS +# [RUNTIME_MODULES_SUBDIR ] +# ) # -# Common behavior: -# - Adds runtime lookup defines on : -# NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./" -# NBL_CPACK_PACKAGE_DXC_DLL_DIR="./" +# nabla_setup_runtime_install_modules( +# [RUNTIME_MODULES_SUBDIR ] +# ) # -# nabla_setup_runtime_modules: -# - Adds build-time copy into: -# $>/ -# - Optionally installs runtime modules when INSTALL_RULES is ON. +# nabla_setup_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_ ]... +# [APPLY_LOOKUP_TO_TARGETS ] +# [RUNTIME_MODULES_SUBDIR ] +# [MODE ] +# [INSTALL_RULES ] +# [BUILD_TRIGGER_TARGETS ] +# ) # -# nabla_setup_runtime_install_modules: -# - Adds install rules only (no build-time copy): -# ${CMAKE_INSTALL_BINDIR}/ +# Wrapper around sync + lookup + install helpers. # # Config mapping: -# - Source module path is resolved from $. -# - Imported-config mapping (MAP_IMPORTED_CONFIG_*) applies automatically. +# - Runtime source path is resolved from mapped imported config of Nabla::Nabla. +# - MAP_IMPORTED_CONFIG_* and CMAKE_MAP_IMPORTED_CONFIG_* are applied automatically. +# - MODE=CONFIGURE_TIME and MODE=BOTH resolve mapped imported config during configure/generate. +# - For MODE=CONFIGURE_TIME and MODE=BOTH, finalize mapping before calling helpers. # - If using CMAKE_MAP_IMPORTED_CONFIG_, set it before find_package(Nabla). # + function(_nbl_runtime_modules_apply_lookup_definitions _TARGET _RUNTIME_MODULES_SUBDIR) target_compile_definitions("${_TARGET}" PRIVATE NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./${_RUNTIME_MODULES_SUBDIR}" @@ -47,6 +62,18 @@ function(_nbl_runtime_modules_apply_lookup_definitions _TARGET _RUNTIME_MODULES_ ) endfunction() +function(_nbl_runtime_modules_apply_lookup_definitions_to_targets _TARGETS _RUNTIME_MODULES_SUBDIR) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + foreach(_target IN LISTS _targets) + if(NOT TARGET "${_target}") + message(FATAL_ERROR "Nabla: target \"${_target}\" does not exist") + endif() + _nbl_runtime_modules_apply_lookup_definitions("${_target}" "${_RUNTIME_MODULES_SUBDIR}") + endforeach() +endfunction() + function(_nbl_runtime_modules_add_install_rules _RUNTIME_MODULES_SUBDIR) if(NOT DEFINED CMAKE_INSTALL_BINDIR) include(GNUInstallDirs) @@ -66,36 +93,581 @@ function(_nbl_runtime_modules_add_install_rules _RUNTIME_MODULES_SUBDIR) endif() endfunction() -function(nabla_setup_runtime_install_modules _TARGET) +function(_nbl_runtime_modules_collect_consumer_configs _OUT_CONFIGS) + if(CMAKE_CONFIGURATION_TYPES) + set(_consumer_configs ${CMAKE_CONFIGURATION_TYPES}) + elseif(CMAKE_BUILD_TYPE) + set(_consumer_configs "${CMAKE_BUILD_TYPE}") + else() + set(_consumer_configs Debug) + endif() + + list(REMOVE_DUPLICATES _consumer_configs) + set(${_OUT_CONFIGS} ${_consumer_configs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_extract_destination_overrides _OUT_OVERRIDES _OUT_UNKNOWN) + set(_tokens ${ARGN}) + set(_overrides "") + set(_unknown "") + + list(LENGTH _tokens _tokens_len) + math(EXPR _tokens_mod2 "${_tokens_len} % 2") + if(_tokens_mod2) + set(${_OUT_OVERRIDES} "" PARENT_SCOPE) + set(${_OUT_UNKNOWN} "${_tokens}" PARENT_SCOPE) + return() + endif() + + while(TRUE) + list(LENGTH _tokens _tokens_len) + if(_tokens_len EQUAL 0) + break() + endif() + + list(POP_FRONT _tokens _key) + list(POP_FRONT _tokens _value) + string(TOUPPER "${_key}" _key_upper) + + if(_key_upper MATCHES "^DESTINATION_[A-Z0-9_]+$") + string(REGEX REPLACE "^DESTINATION_" "" _cfg_upper "${_key_upper}") + list(APPEND _overrides "${_cfg_upper}::${_value}") + else() + list(APPEND _unknown "${_key}" "${_value}") + endif() + endwhile() + + set(${_OUT_OVERRIDES} ${_overrides} PARENT_SCOPE) + set(${_OUT_UNKNOWN} ${_unknown} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_expand_destination_pairs _DESTINATION_DEFAULT _DESTINATION_OVERRIDES _OUT_CFG_DST_PAIRS) + _nbl_runtime_modules_collect_consumer_configs(_consumer_configs) + set(_cfg_dst_pairs "") + + foreach(_consumer_config IN LISTS _consumer_configs) + string(TOUPPER "${_consumer_config}" _cfg_upper) + set(_resolved_destination "") + + foreach(_override IN LISTS _DESTINATION_OVERRIDES) + string(REPLACE "::" ";" _override_parts "${_override}") + list(GET _override_parts 0 _override_cfg_upper) + if(_override_cfg_upper STREQUAL _cfg_upper) + list(GET _override_parts 1 _resolved_destination) + break() + endif() + endforeach() + + if(_resolved_destination STREQUAL "") + set(_resolved_destination "${_DESTINATION_DEFAULT}") + endif() + + if(_resolved_destination STREQUAL "") + message(FATAL_ERROR "Nabla: missing destination for consumer config \"${_consumer_config}\". Provide DESTINATION or DESTINATION_${_cfg_upper}.") + endif() + + if(_resolved_destination MATCHES "\\$<") + message(FATAL_ERROR "Nabla: DESTINATION for MODE CONFIGURE_TIME must be a plain path without generator expressions.") + endif() + + cmake_path(IS_ABSOLUTE _resolved_destination _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _resolved_destination BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _resolved_destination) + endif() + + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_resolved_destination}") + endforeach() + + set(${_OUT_CFG_DST_PAIRS} ${_cfg_dst_pairs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_resolve_imported_nabla_file _CONSUMER_CONFIG _OUT_IMPORTED_FILE) + string(TOUPPER "${_CONSUMER_CONFIG}" _cfg_upper) + + # Resolve runtime source from mapped imported config for given consumer config. + set(_mapped_candidates "") + get_target_property(_target_map "Nabla::Nabla" "MAP_IMPORTED_CONFIG_${_cfg_upper}") + if(_target_map AND NOT _target_map STREQUAL "NOTFOUND") + list(APPEND _mapped_candidates ${_target_map}) + endif() + + set(_global_map_var "CMAKE_MAP_IMPORTED_CONFIG_${_cfg_upper}") + if(DEFINED ${_global_map_var} AND NOT "${${_global_map_var}}" STREQUAL "") + list(APPEND _mapped_candidates ${${_global_map_var}}) + endif() + + list(APPEND _mapped_candidates "${_cfg_upper}") + + foreach(_mapped_config IN LISTS _mapped_candidates) + if(_mapped_config STREQUAL "") + get_target_property(_candidate "Nabla::Nabla" IMPORTED_LOCATION) + else() + string(TOUPPER "${_mapped_config}" _mapped_upper) + get_target_property(_candidate "Nabla::Nabla" "IMPORTED_LOCATION_${_mapped_upper}") + endif() + + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + endforeach() + + get_target_property(_imported_configs "Nabla::Nabla" IMPORTED_CONFIGURATIONS) + foreach(_imported_config IN LISTS _imported_configs) + get_target_property(_candidate "Nabla::Nabla" "IMPORTED_LOCATION_${_imported_config}") + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + endforeach() + + get_target_property(_candidate "Nabla::Nabla" IMPORTED_LOCATION) + if(_candidate AND NOT _candidate STREQUAL "NOTFOUND" AND EXISTS "${_candidate}") + set(${_OUT_IMPORTED_FILE} "${_candidate}" PARENT_SCOPE) + return() + endif() + + message(FATAL_ERROR "Nabla: cannot resolve imported runtime location for consumer config \"${_CONSUMER_CONFIG}\"") +endfunction() + +function(_nbl_runtime_modules_resolve_dxc_runtime_file _NABLA_IMPORTED_FILE _OUT_DXC_IMPORTED_FILE) + cmake_path(GET _NABLA_IMPORTED_FILE PARENT_PATH _nabla_runtime_dir) + set(_dxc_runtime_file "${_nabla_runtime_dir}/3rdparty/dxc/${CMAKE_SHARED_LIBRARY_PREFIX}dxcompiler${CMAKE_SHARED_LIBRARY_SUFFIX}") + + if(NOT EXISTS "${_dxc_runtime_file}") + message(FATAL_ERROR "Nabla: DXC runtime module not found at \"${_dxc_runtime_file}\"") + endif() + + set(${_OUT_DXC_IMPORTED_FILE} "${_dxc_runtime_file}" PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_expand_target_configure_sync_pairs _TARGET _RUNTIME_MODULES_SUBDIR _OUT_CFG_DST_PAIRS) if(NOT TARGET "${_TARGET}") message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") endif() + get_target_property(_runtime_output_dir "${_TARGET}" RUNTIME_OUTPUT_DIRECTORY) + if(_runtime_output_dir) + set(_runtime_output_base "${_runtime_output_dir}") + elseif(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY AND NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY STREQUAL "") + set(_runtime_output_base "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + else() + if(CMAKE_CONFIGURATION_TYPES) + set(_runtime_output_base "${CMAKE_CURRENT_BINARY_DIR}/$") + else() + set(_runtime_output_base "${CMAKE_CURRENT_BINARY_DIR}") + endif() + endif() + + _nbl_runtime_modules_collect_consumer_configs(_consumer_configs) + set(_cfg_dst_pairs "") + + if(_runtime_output_base MATCHES "\\$") + set(_runtime_output_without_config "${_runtime_output_base}") + string(REPLACE "$" "" _runtime_output_without_config "${_runtime_output_without_config}") + if(_runtime_output_without_config MATCHES "\\$<") + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME supports only $ generator expression in runtime output directory") + endif() + + foreach(_consumer_config IN LISTS _consumer_configs) + set(_runtime_output_resolved "${_runtime_output_base}") + string(REPLACE "$" "${_consumer_config}" _runtime_output_resolved "${_runtime_output_resolved}") + cmake_path(IS_ABSOLUTE _runtime_output_resolved _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _runtime_output_resolved BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _runtime_output_resolved) + endif() + set(_runtime_modules_dst "${_runtime_output_resolved}/${_RUNTIME_MODULES_SUBDIR}") + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_runtime_modules_dst}") + endforeach() + else() + if(_runtime_output_base MATCHES "\\$<") + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME supports only plain paths or paths with $ in runtime output directory") + endif() + + list(LENGTH _consumer_configs _consumer_configs_count) + if(_consumer_configs_count GREATER 1) + message(FATAL_ERROR "Nabla: MODE CONFIGURE_TIME with multi-config generators requires $ in runtime output directory") + endif() + + list(GET _consumer_configs 0 _consumer_config) + cmake_path(IS_ABSOLUTE _runtime_output_base _is_abs) + if(NOT _is_abs) + cmake_path(ABSOLUTE_PATH _runtime_output_base BASE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" OUTPUT_VARIABLE _runtime_output_base) + endif() + set(_runtime_modules_dst "${_runtime_output_base}/${_RUNTIME_MODULES_SUBDIR}") + list(APPEND _cfg_dst_pairs "${_consumer_config}::${_runtime_modules_dst}") + endif() + + set(${_OUT_CFG_DST_PAIRS} ${_cfg_dst_pairs} PARENT_SCOPE) +endfunction() + +function(_nbl_runtime_modules_add_configure_sync_rule_for_pairs _CFG_DST_PAIRS _ENABLE_CONFIGURE_DEPENDS) + set(_cfg_dst_pairs ${_CFG_DST_PAIRS}) + + foreach(_cfg_dst_pair IN LISTS _cfg_dst_pairs) + string(REPLACE "::" ";" _cfg_dst_parts "${_cfg_dst_pair}") + list(GET _cfg_dst_parts 0 _consumer_config) + list(GET _cfg_dst_parts 1 _runtime_modules_dst) + + string(MD5 _runtime_modules_dst_key "${_runtime_modules_dst}") + get_property(_runtime_modules_config_synced GLOBAL PROPERTY "NBL_RUNTIME_MODULES_CONFIG_SYNC_${_runtime_modules_dst_key}") + if(_runtime_modules_config_synced) + continue() + endif() + + _nbl_runtime_modules_resolve_imported_nabla_file("${_consumer_config}" _nabla_runtime_file) + _nbl_runtime_modules_resolve_dxc_runtime_file("${_nabla_runtime_file}" _dxc_runtime_file) + + file(MAKE_DIRECTORY "${_runtime_modules_dst}") + + cmake_path(GET _nabla_runtime_file FILENAME _nabla_runtime_name) + cmake_path(GET _dxc_runtime_file FILENAME _dxc_runtime_name) + file(COPY_FILE "${_nabla_runtime_file}" "${_runtime_modules_dst}/${_nabla_runtime_name}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + file(COPY_FILE "${_dxc_runtime_file}" "${_runtime_modules_dst}/${_dxc_runtime_name}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + + if(_ENABLE_CONFIGURE_DEPENDS) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS + "${_nabla_runtime_file}" + "${_dxc_runtime_file}" + ) + endif() + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_CONFIG_SYNC_${_runtime_modules_dst_key}" TRUE) + endforeach() +endfunction() + +function(_nbl_runtime_modules_add_configure_sync_rule_for_targets _TARGETS _RUNTIME_MODULES_SUBDIR _ENABLE_CONFIGURE_DEPENDS) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + set(_cfg_dst_pairs "") + foreach(_target IN LISTS _targets) + _nbl_runtime_modules_expand_target_configure_sync_pairs("${_target}" "${_RUNTIME_MODULES_SUBDIR}" _target_cfg_dst_pairs) + list(APPEND _cfg_dst_pairs ${_target_cfg_dst_pairs}) + endforeach() + + _nbl_runtime_modules_add_configure_sync_rule_for_pairs("${_cfg_dst_pairs}" "${_ENABLE_CONFIGURE_DEPENDS}") +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_target _TARGET _RUNTIME_MODULES_SUBDIR) + if(NOT TARGET "${_TARGET}") + message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") + endif() + + get_target_property(_nbl_runtime_output_dir "${_TARGET}" RUNTIME_OUTPUT_DIRECTORY) + if(_nbl_runtime_output_dir) + set(_nbl_runtime_modules_dest "$") + elseif(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY AND NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY STREQUAL "") + set(_nbl_runtime_modules_dest "$") + else() + set(_nbl_runtime_modules_dest "$/${_RUNTIME_MODULES_SUBDIR}") + endif() + + string(MD5 _nbl_runtime_modules_dest_key "${_nbl_runtime_modules_dest}") + + get_property(_nbl_runtime_modules_stamp GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_${_nbl_runtime_modules_dest_key}") + if(NOT _nbl_runtime_modules_stamp) + set(_nbl_runtime_modules_stamp "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/nabla_runtime_modules_${_nbl_runtime_modules_dest_key}.stamp") + + add_custom_command( + OUTPUT "${_nbl_runtime_modules_stamp}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_nbl_runtime_modules_dest}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${_nbl_runtime_modules_dest}/" + COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "${_nbl_runtime_modules_dest}" + COMMAND ${CMAKE_COMMAND} -E touch "${_nbl_runtime_modules_stamp}" + DEPENDS + "$" + "$,3rdparty,dxc,${CMAKE_SHARED_LIBRARY_PREFIX}dxcompiler${CMAKE_SHARED_LIBRARY_SUFFIX}>" + "$>" + COMMAND_EXPAND_LISTS + VERBATIM + ) + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_${_nbl_runtime_modules_dest_key}" "${_nbl_runtime_modules_stamp}") + endif() + + set_source_files_properties("${_nbl_runtime_modules_stamp}" PROPERTIES GENERATED TRUE) + target_sources("${_TARGET}" PRIVATE "${_nbl_runtime_modules_stamp}") +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_targets _TARGETS _RUNTIME_MODULES_SUBDIR) + set(_targets ${_TARGETS}) + list(REMOVE_DUPLICATES _targets) + + foreach(_target IN LISTS _targets) + _nbl_runtime_modules_add_build_sync_rule_for_target("${_target}" "${_RUNTIME_MODULES_SUBDIR}") + endforeach() +endfunction() + +function(_nbl_runtime_modules_add_build_sync_rule_for_destination_pairs _BUILD_TRIGGER_TARGETS _CFG_DST_PAIRS) + set(_build_trigger_targets ${_BUILD_TRIGGER_TARGETS}) + list(REMOVE_DUPLICATES _build_trigger_targets) + + foreach(_target IN LISTS _build_trigger_targets) + if(NOT TARGET "${_target}") + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS contains unknown target \"${_target}\"") + endif() + endforeach() + + set(_cfg_dst_pairs ${_CFG_DST_PAIRS}) + + foreach(_cfg_dst_pair IN LISTS _cfg_dst_pairs) + string(REPLACE "::" ";" _cfg_dst_parts "${_cfg_dst_pair}") + list(GET _cfg_dst_parts 0 _consumer_config) + list(GET _cfg_dst_parts 1 _runtime_modules_dst) + + _nbl_runtime_modules_resolve_imported_nabla_file("${_consumer_config}" _nabla_runtime_file) + _nbl_runtime_modules_resolve_dxc_runtime_file("${_nabla_runtime_file}" _dxc_runtime_file) + + cmake_path(GET _nabla_runtime_file FILENAME _nabla_runtime_name) + cmake_path(GET _dxc_runtime_file FILENAME _dxc_runtime_name) + set(_nabla_runtime_dst "${_runtime_modules_dst}/${_nabla_runtime_name}") + set(_dxc_runtime_dst "${_runtime_modules_dst}/${_dxc_runtime_name}") + + string(MD5 _runtime_modules_dst_key "${_consumer_config}::${_runtime_modules_dst}") + + get_property(_sync_target GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_DEST_TARGET_${_runtime_modules_dst_key}") + if(NOT _sync_target) + set(_sync_target "nabla_runtime_modules_dest_sync_${_runtime_modules_dst_key}") + set(_sync_stamp "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_sync_target}.stamp") + + add_custom_command( + OUTPUT "${_sync_stamp}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_runtime_modules_dst}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_nabla_runtime_file}" "${_runtime_modules_dst}/" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_dxc_runtime_file}" "${_runtime_modules_dst}/" + COMMAND ${CMAKE_COMMAND} -E touch "${_sync_stamp}" + DEPENDS + "${_nabla_runtime_file}" + "${_dxc_runtime_file}" + "${_nabla_runtime_dst}" + "${_dxc_runtime_dst}" + VERBATIM + ) + + add_custom_target("${_sync_target}" DEPENDS "${_sync_stamp}") + set_target_properties("${_sync_target}" PROPERTIES + FOLDER "CMakePredefinedTargets" + EXCLUDE_FROM_ALL TRUE + ) + + set_property(GLOBAL PROPERTY "NBL_RUNTIME_MODULES_BUILD_SYNC_DEST_TARGET_${_runtime_modules_dst_key}" "${_sync_target}") + endif() + + foreach(_target IN LISTS _build_trigger_targets) + add_dependencies("${_target}" "${_sync_target}") + endforeach() + endforeach() +endfunction() + +# +# nabla_apply_runtime_lookup( +# TARGETS +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# Applies runtime lookup compile definitions to executable targets. +# The lookup is always relative to executable directory and does not expose +# absolute paths. +# +# Notes: +# - TARGETS is required. +# - RUNTIME_MODULES_SUBDIR defaults to "Libraries". +# +function(nabla_apply_runtime_lookup) + set(_nbl_runtime_modules_subdir "Libraries") + + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR" "TARGETS" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_apply_runtime_lookup: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() + + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + + if(NOT _NBL_CUSTOM_TARGETS) + message(FATAL_ERROR "Nabla: nabla_apply_runtime_lookup requires TARGETS ") + endif() + + _nbl_runtime_modules_apply_lookup_definitions_to_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}") +endfunction() + +# +# nabla_setup_runtime_install_modules( +# [RUNTIME_MODULES_SUBDIR ] +# ) +# +# Adds install() rules that copy Nabla and DXC runtime modules into: +# ${CMAKE_INSTALL_BINDIR}/ +# +# Notes: +# - RUNTIME_MODULES_SUBDIR defaults to "Libraries". +# - This helper only adds install rules. +# +function(nabla_setup_runtime_install_modules) set(_nbl_runtime_modules_subdir "Libraries") - set(_nbl_options "") - set(_nbl_one_value_args RUNTIME_MODULES_SUBDIR) - set(_nbl_multi_value_args "") - cmake_parse_arguments(_NBL_CUSTOM "${_nbl_options}" "${_nbl_one_value_args}" "${_nbl_multi_value_args}" ${ARGN}) + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR" "" ${ARGV}) + + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_install_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") + endif() if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") endif() - _nbl_runtime_modules_apply_lookup_definitions("${_TARGET}" "${_nbl_runtime_modules_subdir}") _nbl_runtime_modules_add_install_rules("${_nbl_runtime_modules_subdir}") endfunction() -function(nabla_setup_runtime_modules _TARGET) - if(NOT TARGET "${_TARGET}") - message(FATAL_ERROR "Nabla: target \"${_TARGET}\" does not exist") +# +# nabla_sync_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_ ...] +# [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] +# [RUNTIME_MODULES_SUBDIR ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# Synchronizes runtime modules from Nabla package into consumer runtime layout. +# +# Input modes (mutually exclusive): +# - TARGETS mode +# Copies beside each target runtime dir under RUNTIME_MODULES_SUBDIR. +# - DESTINATION mode +# Copies to explicit DESTINATION or DESTINATION_ paths. +# +# MODE: +# - BUILD_TIME +# Copy during build. +# - CONFIGURE_TIME +# Copy during configure/generate and set configure depends. +# - BOTH +# Run configure-time copy and build-time copy. +# +# Rules: +# - exactly one input mode must be used +# - BUILD_TRIGGER_TARGETS is valid only in DESTINATION mode for BUILD_TIME/BOTH +# +function(nabla_sync_runtime_modules) + set(_nbl_runtime_modules_subdir "Libraries") + set(_nbl_mode BUILD_TIME) + + cmake_parse_arguments(_NBL_CUSTOM "" "MODE;DESTINATION;RUNTIME_MODULES_SUBDIR" "TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) + _nbl_runtime_modules_extract_destination_overrides(_destination_overrides _unknown_tokens ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}) + + if(_unknown_tokens) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_sync_runtime_modules: ${_unknown_tokens}") endif() + if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) + set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") + endif() + if(DEFINED _NBL_CUSTOM_MODE) + set(_nbl_mode "${_NBL_CUSTOM_MODE}") + endif() + + string(TOUPPER "${_nbl_mode}" _nbl_mode) + if(NOT _nbl_mode MATCHES "^(BUILD_TIME|CONFIGURE_TIME|BOTH)$") + message(FATAL_ERROR "Nabla: invalid MODE='${_nbl_mode}', expected BUILD_TIME, CONFIGURE_TIME or BOTH") + endif() + + set(_has_targets OFF) + if(_NBL_CUSTOM_TARGETS) + set(_has_targets ON) + endif() + + set(_has_destination OFF) + if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") + set(_has_destination ON) + endif() + if(_destination_overrides) + set(_has_destination ON) + endif() + + if(_has_targets AND _has_destination) + message(FATAL_ERROR "Nabla: use either TARGETS mode or DESTINATION mode, not both") + endif() + + if(NOT _has_targets AND NOT _has_destination) + message(FATAL_ERROR "Nabla: nabla_sync_runtime_modules requires TARGETS or DESTINATION/DESTINATION_") + endif() + + if(_has_targets) + if(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS is valid only in DESTINATION mode") + endif() + + if(_nbl_mode STREQUAL "CONFIGURE_TIME" OR _nbl_mode STREQUAL "BOTH") + set(_enable_configure_depends OFF) + if(_nbl_mode STREQUAL "CONFIGURE_TIME") + set(_enable_configure_depends ON) + endif() + _nbl_runtime_modules_add_configure_sync_rule_for_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}" "${_enable_configure_depends}") + endif() + + if(_nbl_mode STREQUAL "BUILD_TIME" OR _nbl_mode STREQUAL "BOTH") + _nbl_runtime_modules_add_build_sync_rule_for_targets("${_NBL_CUSTOM_TARGETS}" "${_nbl_runtime_modules_subdir}") + endif() + + return() + endif() + + _nbl_runtime_modules_expand_destination_pairs("${_NBL_CUSTOM_DESTINATION}" "${_destination_overrides}" _cfg_dst_pairs) + + if(_nbl_mode STREQUAL "CONFIGURE_TIME" OR _nbl_mode STREQUAL "BOTH") + set(_enable_configure_depends OFF) + if(_nbl_mode STREQUAL "CONFIGURE_TIME") + set(_enable_configure_depends ON) + endif() + _nbl_runtime_modules_add_configure_sync_rule_for_pairs("${_cfg_dst_pairs}" "${_enable_configure_depends}") + endif() + + if(_nbl_mode STREQUAL "BUILD_TIME" OR _nbl_mode STREQUAL "BOTH") + if(NOT _NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: DESTINATION mode with MODE ${_nbl_mode} requires BUILD_TRIGGER_TARGETS") + endif() + _nbl_runtime_modules_add_build_sync_rule_for_destination_pairs("${_NBL_CUSTOM_BUILD_TRIGGER_TARGETS}" "${_cfg_dst_pairs}") + elseif(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + message(FATAL_ERROR "Nabla: BUILD_TRIGGER_TARGETS is valid only for MODE BUILD_TIME or MODE BOTH") + endif() +endfunction() + +# +# nabla_setup_runtime_modules( +# [TARGETS ] +# [DESTINATION ] +# [DESTINATION_ ...] +# [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] +# [RUNTIME_MODULES_SUBDIR ] +# [INSTALL_RULES ON|OFF] +# [APPLY_LOOKUP_TO_TARGETS ] +# [BUILD_TRIGGER_TARGETS ] +# ) +# +# Convenience wrapper that composes: +# - nabla_sync_runtime_modules(...) +# - nabla_apply_runtime_lookup(...) +# - nabla_setup_runtime_install_modules(...) when INSTALL_RULES is enabled +# +# Lookup behavior: +# - if APPLY_LOOKUP_TO_TARGETS is set, lookup is applied to that list +# - else if TARGETS mode is used, lookup is applied to TARGETS +# - else no lookup changes are applied +# +function(nabla_setup_runtime_modules) set(_nbl_runtime_modules_subdir "Libraries") set(_nbl_install_rules OFF) - set(_nbl_options "") - set(_nbl_one_value_args RUNTIME_MODULES_SUBDIR INSTALL_RULES) - set(_nbl_multi_value_args "") - cmake_parse_arguments(_NBL_CUSTOM "${_nbl_options}" "${_nbl_one_value_args}" "${_nbl_multi_value_args}" ${ARGN}) + set(_nbl_mode BUILD_TIME) + + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR;INSTALL_RULES;MODE;DESTINATION" "TARGETS;APPLY_LOOKUP_TO_TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) + _nbl_runtime_modules_extract_destination_overrides(_destination_overrides _unknown_tokens ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}) + + if(_unknown_tokens) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_modules: ${_unknown_tokens}") + endif() if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) set(_nbl_runtime_modules_subdir "${_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR}") @@ -103,18 +675,51 @@ function(nabla_setup_runtime_modules _TARGET) if(DEFINED _NBL_CUSTOM_INSTALL_RULES) set(_nbl_install_rules "${_NBL_CUSTOM_INSTALL_RULES}") endif() + if(DEFINED _NBL_CUSTOM_MODE) + set(_nbl_mode "${_NBL_CUSTOM_MODE}") + endif() - _nbl_runtime_modules_apply_lookup_definitions("${_TARGET}" "${_nbl_runtime_modules_subdir}") - - add_custom_command(TARGET "${_TARGET}" POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory "$/${_nbl_runtime_modules_subdir}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "$/${_nbl_runtime_modules_subdir}/" - COMMAND ${CMAKE_COMMAND} -E copy_directory "$,3rdparty,dxc>" "$/${_nbl_runtime_modules_subdir}" - VERBATIM + set(_sync_args + MODE "${_nbl_mode}" + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" ) + if(_NBL_CUSTOM_TARGETS) + list(APPEND _sync_args TARGETS ${_NBL_CUSTOM_TARGETS}) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") + list(APPEND _sync_args DESTINATION "${_NBL_CUSTOM_DESTINATION}") + endif() + foreach(_override IN LISTS _destination_overrides) + string(REPLACE "::" ";" _override_parts "${_override}") + list(GET _override_parts 0 _cfg_upper) + list(GET _override_parts 1 _cfg_destination) + list(APPEND _sync_args "DESTINATION_${_cfg_upper}" "${_cfg_destination}") + endforeach() + if(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) + list(APPEND _sync_args BUILD_TRIGGER_TARGETS ${_NBL_CUSTOM_BUILD_TRIGGER_TARGETS}) + endif() + + nabla_sync_runtime_modules(${_sync_args}) + + set(_lookup_targets "") + if(_NBL_CUSTOM_APPLY_LOOKUP_TO_TARGETS) + set(_lookup_targets ${_NBL_CUSTOM_APPLY_LOOKUP_TO_TARGETS}) + elseif(_NBL_CUSTOM_TARGETS) + set(_lookup_targets ${_NBL_CUSTOM_TARGETS}) + endif() + + if(_lookup_targets) + nabla_apply_runtime_lookup( + TARGETS ${_lookup_targets} + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" + ) + endif() + if(_nbl_install_rules) - _nbl_runtime_modules_add_install_rules("${_nbl_runtime_modules_subdir}") + nabla_setup_runtime_install_modules( + RUNTIME_MODULES_SUBDIR "${_nbl_runtime_modules_subdir}" + ) endif() endfunction() diff --git a/docs/consume/README.md b/docs/consume/README.md index 4b746c6618..53353fd6d6 100644 --- a/docs/consume/README.md +++ b/docs/consume/README.md @@ -7,19 +7,20 @@ This document describes how to consume an installed Nabla package from another C After `find_package(Nabla CONFIG REQUIRED)`, the package provides: - imported target `Nabla::Nabla` -- helper `nabla_setup_runtime_modules(...)` +- helper `nabla_sync_runtime_modules(...)` +- helper `nabla_apply_runtime_lookup(...)` - helper `nabla_setup_runtime_install_modules(...)` +- wrapper `nabla_setup_runtime_modules(...)` On shared builds, runtime modules include Nabla and DXC. -## 2. Locate the package +Implementation and argument docs: -You can point CMake to the package with: +- package API implementation: `${Nabla_ROOT}/cmake/NablaConfig.cmake` +- source template in Nabla repo: `cmake/NablaConfig.cmake.in` +- each public helper has usage notes in comments directly above its definition -- `-D Nabla_DIR=/cmake` -- `CMAKE_PREFIX_PATH=` - -Minimal baseline: +## 2. Minimal baseline ```cmake cmake_minimum_required(VERSION 3.30) @@ -31,57 +32,65 @@ add_executable(my_app main.cpp) target_link_libraries(my_app PRIVATE Nabla::Nabla) ``` -## 3. Flow NO_BUILD_COPY install to e.g. `./Libraries` - -Use this flow when: +Behavior in this minimal setup: -- build-time should load directly from package -- install tree should load from e.g. `./Libraries` +- executable loads Nabla/DXC directly from package-provided lookup paths +- this works in consumer build interface without extra copy helpers +- install layout is not configured by this baseline -Call install-only helper: +If you also need your own install layout, add install rules and relative lookup defines. +Helpers from sections below can do this for you. -```cmake -include(GNUInstallDirs) +## 3. Runtime setup primitives -add_executable(my_app main.cpp) -target_link_libraries(my_app PRIVATE Nabla::Nabla) +### 3.1 Copy runtime modules -nabla_setup_runtime_install_modules(my_app +```cmake +nabla_sync_runtime_modules( + TARGETS my_app + MODE BUILD_TIME RUNTIME_MODULES_SUBDIR "Libraries" ) +``` -install(TARGETS my_app - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" +or with explicit destination(s): + +```cmake +nabla_sync_runtime_modules( + DESTINATION_DEBUG "${CMAKE_BINARY_DIR}/Debug/Libraries" + DESTINATION_RELEASE "${CMAKE_BINARY_DIR}/Release/Libraries" + DESTINATION_RELWITHDEBINFO "${CMAKE_BINARY_DIR}/RelWithDebInfo/Libraries" + MODE CONFIGURE_TIME ) ``` -What it does: +Rules: -- adds runtime lookup defines `./Libraries` -- adds install rules for Nabla/DXC runtime modules to `${CMAKE_INSTALL_BINDIR}/Libraries` -- does not add post-build copy +- use either `TARGETS` mode or `DESTINATION` / `DESTINATION_` mode +- `MODE CONFIGURE_TIME` does copy during configure/generate +- `MODE BUILD_TIME` and `MODE BOTH` in destination mode require `BUILD_TRIGGER_TARGETS` -Runtime behavior: +### 3.2 Apply runtime lookup defines -- build tree falls back to package runtime if `./Libraries` does not exist and relative package lookup can be resolved -- install tree uses `./Libraries` once modules are installed there +```cmake +nabla_apply_runtime_lookup( + TARGETS my_app + RUNTIME_MODULES_SUBDIR "Libraries" +) +``` -## 4. Flow WITH_BUILD_COPY install to e.g. `./Libraries` +This sets: -Use one call when you want both: +- `NBL_CPACK_PACKAGE_NABLA_DLL_DIR="./Libraries"` +- `NBL_CPACK_PACKAGE_DXC_DLL_DIR="./Libraries"` -- build-time copy to runtime subdir -- install-time copy to runtime subdir +### 3.3 Install runtime modules ```cmake include(GNUInstallDirs) -add_executable(my_app main.cpp) -target_link_libraries(my_app PRIVATE Nabla::Nabla) - -nabla_setup_runtime_modules(my_app +nabla_setup_runtime_install_modules( RUNTIME_MODULES_SUBDIR "Libraries" - INSTALL_RULES ON ) install(TARGETS my_app @@ -89,9 +98,48 @@ install(TARGETS my_app ) ``` -## 5. Config mapping +## 4. Wrapper helper + +`nabla_setup_runtime_modules(...)` composes: + +- `nabla_sync_runtime_modules(...)` +- `nabla_apply_runtime_lookup(...)` +- optional `nabla_setup_runtime_install_modules(...)` + +Example: + +```cmake +nabla_setup_runtime_modules( + TARGETS my_app + MODE CONFIGURE_TIME + RUNTIME_MODULES_SUBDIR "Libraries" + INSTALL_RULES ON +) +``` + +## 5. Split flow global copy and per-exe lookup + +This is the split pattern used by consumers that want one global copy setup and per-exe lookup: + +```cmake +# one global copy setup +nabla_sync_runtime_modules( + DESTINATION_DEBUG "${CMAKE_BINARY_DIR}/3rdparty/shared/Debug/Libraries" + DESTINATION_RELEASE "${CMAKE_BINARY_DIR}/3rdparty/shared/Release/Libraries" + DESTINATION_RELWITHDEBINFO "${CMAKE_BINARY_DIR}/3rdparty/shared/RelWithDebInfo/Libraries" + MODE CONFIGURE_TIME +) + +# per executable target +nabla_apply_runtime_lookup( + TARGETS my_app + RUNTIME_MODULES_SUBDIR "Libraries" +) +``` + +## 6. Config mapping -Runtime source paths are resolved from `$`. +Runtime source paths are resolved from mapped imported config of `Nabla::Nabla`. Imported-config mapping applies automatically. This includes cross-config usage when one consumer config maps to a different imported config. @@ -99,44 +147,38 @@ If you override mapping: - do it in the same configure run - if using `CMAKE_MAP_IMPORTED_CONFIG_`, set it before `find_package(Nabla)` +- for `MODE CONFIGURE_TIME` and `MODE BOTH`, set mapping before helper call -## 6. Troubleshooting +## 7. Troubleshooting ### `Could not load dxcompiler module` or `Could not load Nabla API` Check: -- helper usage matches your intended flow mode -- `RUNTIME_MODULES_SUBDIR` matches actual runtime folder layout -- install tree actually contains runtime modules under expected subdir +- lookup defines are applied to executable target(s) +- lookup subdir matches actual runtime layout +- runtime modules exist in build/install runtime directory ### Build works but installed app fails -Most often install rules are missing. +Install rules are usually missing. Use either: -- `nabla_setup_runtime_install_modules(...)` for `NO_BUILD_COPY` -- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` for `WITH_BUILD_COPY` - -### Build tree cannot resolve package runtime in install-only mode - -This usually means your build tree and package runtime are on different roots or drives so a relative fallback cannot be formed. - -Use one of: +- `nabla_setup_runtime_install_modules(...)` +- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` -- `nabla_setup_runtime_modules(... INSTALL_RULES ON)` to copy runtime modules into build tree - -### Why modules are copied in build tree - -Only `nabla_setup_runtime_modules(... INSTALL_RULES ON)` performs build-time copy. - -If you want no build copy, use `nabla_setup_runtime_install_modules(...)` instead. - -## 7. Design guidance +## 8. Design guidance For relocatable consumers: - keep lookup relative to executable - never expose absolute paths in public compile definitions -- use one of the helper flows consistently per target +- keep copy setup and lookup setup explicit in CMake + +Note: + +Current Nabla build interface still compiles some runtime lookup data with absolute paths. +This is a known issue on Nabla side and will be refactored. +Do not propagate that pattern to package consumers. +Consumer-facing package helpers are designed to avoid exposing absolute paths in consumer compile definitions. diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index 8de97448d1..7369388483 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -32,21 +32,24 @@ target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) -set(NBL_SMOKE_FLOW "NO_BUILD_COPY" CACHE STRING "Smoke runtime flow: NO_BUILD_COPY or WITH_BUILD_COPY") -set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS NO_BUILD_COPY WITH_BUILD_COPY) +set(NBL_SMOKE_FLOW "CONFIGURE_ONLY" CACHE STRING "Smoke runtime flow: CONFIGURE_ONLY or BUILD_ONLY") +set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS CONFIGURE_ONLY BUILD_ONLY) string(TOUPPER "${NBL_SMOKE_FLOW}" NBL_SMOKE_FLOW) message(STATUS "Smoke runtime flow: ${NBL_SMOKE_FLOW}") option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" ON) -if(NBL_SMOKE_FLOW STREQUAL "NO_BUILD_COPY") - # No build-time copy, install-time runtime modules in ./Libraries. - nabla_setup_runtime_install_modules(smoke +if(NBL_SMOKE_FLOW STREQUAL "CONFIGURE_ONLY") + nabla_setup_runtime_modules( + TARGETS smoke RUNTIME_MODULES_SUBDIR "Libraries" + MODE CONFIGURE_TIME + INSTALL_RULES ON ) -elseif(NBL_SMOKE_FLOW STREQUAL "WITH_BUILD_COPY") - # Build-time copy + install-time runtime modules in ./Libraries. - nabla_setup_runtime_modules(smoke +elseif(NBL_SMOKE_FLOW STREQUAL "BUILD_ONLY") + nabla_setup_runtime_modules( + TARGETS smoke RUNTIME_MODULES_SUBDIR "Libraries" + MODE BUILD_TIME INSTALL_RULES ON ) else() diff --git a/smoke/RunSmokeFlow.cmake b/smoke/RunSmokeFlow.cmake index 64d8ea0e7d..f192e2a838 100644 --- a/smoke/RunSmokeFlow.cmake +++ b/smoke/RunSmokeFlow.cmake @@ -1,10 +1,10 @@ if(NOT DEFINED FLOW) - message(FATAL_ERROR "FLOW is required. Allowed values: NO_BUILD_COPY, WITH_BUILD_COPY") + message(FATAL_ERROR "FLOW is required. Allowed values: CONFIGURE_ONLY, BUILD_ONLY") endif() string(TOUPPER "${FLOW}" FLOW) -if(NOT FLOW MATCHES "^(NO_BUILD_COPY|WITH_BUILD_COPY)$") - message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: NO_BUILD_COPY, WITH_BUILD_COPY") +if(NOT FLOW MATCHES "^(CONFIGURE_ONLY|BUILD_ONLY)$") + message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: CONFIGURE_ONLY, BUILD_ONLY") endif() if(NOT DEFINED CONFIG) From 6c23014559f23c7e287f466af8eaa5259020f78b Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Feb 2026 01:02:01 +0100 Subject: [PATCH 32/54] Use cmake parse arguments for runtime module destinations --- cmake/NablaConfig.cmake.in | 123 ++++++++++++++++--------------------- docs/consume/README.md | 10 +-- 2 files changed, 57 insertions(+), 76 deletions(-) diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index 24d46834e5..b67293d5ab 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -19,7 +19,9 @@ check_required_components(Nabla) # nabla_sync_runtime_modules( # [TARGETS ] # [DESTINATION ] -# [DESTINATION_ ]... +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] # [MODE ] # [RUNTIME_MODULES_SUBDIR ] # [BUILD_TRIGGER_TARGETS ] @@ -37,7 +39,9 @@ check_required_components(Nabla) # nabla_setup_runtime_modules( # [TARGETS ] # [DESTINATION ] -# [DESTINATION_ ]... +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] # [APPLY_LOOKUP_TO_TARGETS ] # [RUNTIME_MODULES_SUBDIR ] # [MODE ] @@ -106,64 +110,24 @@ function(_nbl_runtime_modules_collect_consumer_configs _OUT_CONFIGS) set(${_OUT_CONFIGS} ${_consumer_configs} PARENT_SCOPE) endfunction() -function(_nbl_runtime_modules_extract_destination_overrides _OUT_OVERRIDES _OUT_UNKNOWN) - set(_tokens ${ARGN}) - set(_overrides "") - set(_unknown "") - - list(LENGTH _tokens _tokens_len) - math(EXPR _tokens_mod2 "${_tokens_len} % 2") - if(_tokens_mod2) - set(${_OUT_OVERRIDES} "" PARENT_SCOPE) - set(${_OUT_UNKNOWN} "${_tokens}" PARENT_SCOPE) - return() - endif() - - while(TRUE) - list(LENGTH _tokens _tokens_len) - if(_tokens_len EQUAL 0) - break() - endif() - - list(POP_FRONT _tokens _key) - list(POP_FRONT _tokens _value) - string(TOUPPER "${_key}" _key_upper) - - if(_key_upper MATCHES "^DESTINATION_[A-Z0-9_]+$") - string(REGEX REPLACE "^DESTINATION_" "" _cfg_upper "${_key_upper}") - list(APPEND _overrides "${_cfg_upper}::${_value}") - else() - list(APPEND _unknown "${_key}" "${_value}") - endif() - endwhile() - - set(${_OUT_OVERRIDES} ${_overrides} PARENT_SCOPE) - set(${_OUT_UNKNOWN} ${_unknown} PARENT_SCOPE) -endfunction() - -function(_nbl_runtime_modules_expand_destination_pairs _DESTINATION_DEFAULT _DESTINATION_OVERRIDES _OUT_CFG_DST_PAIRS) +function(_nbl_runtime_modules_expand_destination_pairs _DESTINATION_DEFAULT _DESTINATION_DEBUG _DESTINATION_RELEASE _DESTINATION_RELWITHDEBINFO _OUT_CFG_DST_PAIRS) _nbl_runtime_modules_collect_consumer_configs(_consumer_configs) set(_cfg_dst_pairs "") foreach(_consumer_config IN LISTS _consumer_configs) string(TOUPPER "${_consumer_config}" _cfg_upper) - set(_resolved_destination "") - - foreach(_override IN LISTS _DESTINATION_OVERRIDES) - string(REPLACE "::" ";" _override_parts "${_override}") - list(GET _override_parts 0 _override_cfg_upper) - if(_override_cfg_upper STREQUAL _cfg_upper) - list(GET _override_parts 1 _resolved_destination) - break() - endif() - endforeach() - - if(_resolved_destination STREQUAL "") + if(_cfg_upper STREQUAL "DEBUG" AND NOT _DESTINATION_DEBUG STREQUAL "") + set(_resolved_destination "${_DESTINATION_DEBUG}") + elseif(_cfg_upper STREQUAL "RELEASE" AND NOT _DESTINATION_RELEASE STREQUAL "") + set(_resolved_destination "${_DESTINATION_RELEASE}") + elseif(_cfg_upper STREQUAL "RELWITHDEBINFO" AND NOT _DESTINATION_RELWITHDEBINFO STREQUAL "") + set(_resolved_destination "${_DESTINATION_RELWITHDEBINFO}") + else() set(_resolved_destination "${_DESTINATION_DEFAULT}") endif() if(_resolved_destination STREQUAL "") - message(FATAL_ERROR "Nabla: missing destination for consumer config \"${_consumer_config}\". Provide DESTINATION or DESTINATION_${_cfg_upper}.") + message(FATAL_ERROR "Nabla: missing destination for consumer config \"${_consumer_config}\". Provide DESTINATION or one of DESTINATION_DEBUG/DESTINATION_RELEASE/DESTINATION_RELWITHDEBINFO.") endif() if(_resolved_destination MATCHES "\\$<") @@ -525,7 +489,9 @@ endfunction() # nabla_sync_runtime_modules( # [TARGETS ] # [DESTINATION ] -# [DESTINATION_ ...] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] # [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] # [RUNTIME_MODULES_SUBDIR ] # [BUILD_TRIGGER_TARGETS ] @@ -537,7 +503,7 @@ endfunction() # - TARGETS mode # Copies beside each target runtime dir under RUNTIME_MODULES_SUBDIR. # - DESTINATION mode -# Copies to explicit DESTINATION or DESTINATION_ paths. +# Copies to explicit DESTINATION or DESTINATION_DEBUG/RELEASE/RELWITHDEBINFO paths. # # MODE: # - BUILD_TIME @@ -555,11 +521,10 @@ function(nabla_sync_runtime_modules) set(_nbl_runtime_modules_subdir "Libraries") set(_nbl_mode BUILD_TIME) - cmake_parse_arguments(_NBL_CUSTOM "" "MODE;DESTINATION;RUNTIME_MODULES_SUBDIR" "TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) - _nbl_runtime_modules_extract_destination_overrides(_destination_overrides _unknown_tokens ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}) + cmake_parse_arguments(_NBL_CUSTOM "" "MODE;DESTINATION;DESTINATION_DEBUG;DESTINATION_RELEASE;DESTINATION_RELWITHDEBINFO;RUNTIME_MODULES_SUBDIR" "TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) - if(_unknown_tokens) - message(FATAL_ERROR "Nabla: unexpected arguments for nabla_sync_runtime_modules: ${_unknown_tokens}") + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_sync_runtime_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") endif() if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) @@ -583,7 +548,13 @@ function(nabla_sync_runtime_modules) if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") set(_has_destination ON) endif() - if(_destination_overrides) + if(DEFINED _NBL_CUSTOM_DESTINATION_DEBUG AND NOT _NBL_CUSTOM_DESTINATION_DEBUG STREQUAL "") + set(_has_destination ON) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELEASE AND NOT _NBL_CUSTOM_DESTINATION_RELEASE STREQUAL "") + set(_has_destination ON) + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO AND NOT _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO STREQUAL "") set(_has_destination ON) endif() @@ -592,7 +563,7 @@ function(nabla_sync_runtime_modules) endif() if(NOT _has_targets AND NOT _has_destination) - message(FATAL_ERROR "Nabla: nabla_sync_runtime_modules requires TARGETS or DESTINATION/DESTINATION_") + message(FATAL_ERROR "Nabla: nabla_sync_runtime_modules requires TARGETS or DESTINATION/DESTINATION_DEBUG/DESTINATION_RELEASE/DESTINATION_RELWITHDEBINFO") endif() if(_has_targets) @@ -615,7 +586,13 @@ function(nabla_sync_runtime_modules) return() endif() - _nbl_runtime_modules_expand_destination_pairs("${_NBL_CUSTOM_DESTINATION}" "${_destination_overrides}" _cfg_dst_pairs) + _nbl_runtime_modules_expand_destination_pairs( + "${_NBL_CUSTOM_DESTINATION}" + "${_NBL_CUSTOM_DESTINATION_DEBUG}" + "${_NBL_CUSTOM_DESTINATION_RELEASE}" + "${_NBL_CUSTOM_DESTINATION_RELWITHDEBINFO}" + _cfg_dst_pairs + ) if(_nbl_mode STREQUAL "CONFIGURE_TIME" OR _nbl_mode STREQUAL "BOTH") set(_enable_configure_depends OFF) @@ -639,7 +616,9 @@ endfunction() # nabla_setup_runtime_modules( # [TARGETS ] # [DESTINATION ] -# [DESTINATION_ ...] +# [DESTINATION_DEBUG ] +# [DESTINATION_RELEASE ] +# [DESTINATION_RELWITHDEBINFO ] # [MODE BUILD_TIME|CONFIGURE_TIME|BOTH] # [RUNTIME_MODULES_SUBDIR ] # [INSTALL_RULES ON|OFF] @@ -662,11 +641,10 @@ function(nabla_setup_runtime_modules) set(_nbl_install_rules OFF) set(_nbl_mode BUILD_TIME) - cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR;INSTALL_RULES;MODE;DESTINATION" "TARGETS;APPLY_LOOKUP_TO_TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) - _nbl_runtime_modules_extract_destination_overrides(_destination_overrides _unknown_tokens ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}) + cmake_parse_arguments(_NBL_CUSTOM "" "RUNTIME_MODULES_SUBDIR;INSTALL_RULES;MODE;DESTINATION;DESTINATION_DEBUG;DESTINATION_RELEASE;DESTINATION_RELWITHDEBINFO" "TARGETS;APPLY_LOOKUP_TO_TARGETS;BUILD_TRIGGER_TARGETS" ${ARGV}) - if(_unknown_tokens) - message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_modules: ${_unknown_tokens}") + if(_NBL_CUSTOM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Nabla: unexpected arguments for nabla_setup_runtime_modules: ${_NBL_CUSTOM_UNPARSED_ARGUMENTS}") endif() if(_NBL_CUSTOM_RUNTIME_MODULES_SUBDIR) @@ -690,12 +668,15 @@ function(nabla_setup_runtime_modules) if(DEFINED _NBL_CUSTOM_DESTINATION AND NOT _NBL_CUSTOM_DESTINATION STREQUAL "") list(APPEND _sync_args DESTINATION "${_NBL_CUSTOM_DESTINATION}") endif() - foreach(_override IN LISTS _destination_overrides) - string(REPLACE "::" ";" _override_parts "${_override}") - list(GET _override_parts 0 _cfg_upper) - list(GET _override_parts 1 _cfg_destination) - list(APPEND _sync_args "DESTINATION_${_cfg_upper}" "${_cfg_destination}") - endforeach() + if(DEFINED _NBL_CUSTOM_DESTINATION_DEBUG AND NOT _NBL_CUSTOM_DESTINATION_DEBUG STREQUAL "") + list(APPEND _sync_args DESTINATION_DEBUG "${_NBL_CUSTOM_DESTINATION_DEBUG}") + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELEASE AND NOT _NBL_CUSTOM_DESTINATION_RELEASE STREQUAL "") + list(APPEND _sync_args DESTINATION_RELEASE "${_NBL_CUSTOM_DESTINATION_RELEASE}") + endif() + if(DEFINED _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO AND NOT _NBL_CUSTOM_DESTINATION_RELWITHDEBINFO STREQUAL "") + list(APPEND _sync_args DESTINATION_RELWITHDEBINFO "${_NBL_CUSTOM_DESTINATION_RELWITHDEBINFO}") + endif() if(_NBL_CUSTOM_BUILD_TRIGGER_TARGETS) list(APPEND _sync_args BUILD_TRIGGER_TARGETS ${_NBL_CUSTOM_BUILD_TRIGGER_TARGETS}) endif() diff --git a/docs/consume/README.md b/docs/consume/README.md index 53353fd6d6..f11aa10bbe 100644 --- a/docs/consume/README.md +++ b/docs/consume/README.md @@ -66,7 +66,7 @@ nabla_sync_runtime_modules( Rules: -- use either `TARGETS` mode or `DESTINATION` / `DESTINATION_` mode +- use either `TARGETS` mode or `DESTINATION` / `DESTINATION_DEBUG` / `DESTINATION_RELEASE` / `DESTINATION_RELWITHDEBINFO` mode - `MODE CONFIGURE_TIME` does copy during configure/generate - `MODE BUILD_TIME` and `MODE BOTH` in destination mode require `BUILD_TRIGGER_TARGETS` @@ -178,7 +178,7 @@ For relocatable consumers: Note: -Current Nabla build interface still compiles some runtime lookup data with absolute paths. -This is a known issue on Nabla side and will be refactored. -Do not propagate that pattern to package consumers. -Consumer-facing package helpers are designed to avoid exposing absolute paths in consumer compile definitions. +- current Nabla build interface still compiles some runtime lookup data with absolute paths +- this is a known issue on Nabla side and will be refactored +- do not propagate that pattern to package consumers +- consumer-facing package helpers are designed to avoid exposing absolute paths in consumer compile definitions From 833009ba7dceb86f4c6723cdaa43734fba24fe8e Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Feb 2026 01:43:06 +0100 Subject: [PATCH 33/54] Add minimalistic smoke flow and consumption reference --- docs/consume/README.md | 15 +++++++++++++ smoke/CMakeLists.txt | 8 ++++--- smoke/RunSmokeFlow.cmake | 47 +++++++++++++++++++++++----------------- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/docs/consume/README.md b/docs/consume/README.md index f11aa10bbe..e73a69c310 100644 --- a/docs/consume/README.md +++ b/docs/consume/README.md @@ -182,3 +182,18 @@ Note: - this is a known issue on Nabla side and will be refactored - do not propagate that pattern to package consumers - consumer-facing package helpers are designed to avoid exposing absolute paths in consumer compile definitions + +## 9. Smoke reference + +`smoke/` is a reference consumer for Nabla package consumption. + +It contains multiple usage flows: + +- `MINIMALISTIC` link-only consumption without helper calls +- `CONFIGURE_ONLY` helper-based configure-time runtime sync +- `BUILD_ONLY` helper-based build-time runtime sync + +Flow selection is done with `NBL_SMOKE_FLOW` in `smoke/CMakeLists.txt` and `FLOW` in `smoke/RunSmokeFlow.cmake`. + +Smoke is also used as CI coverage for package consumption flows. +The `smoke-tests` job in `.github/workflows/build-nabla.yml` runs those flows as end-to-end checks. diff --git a/smoke/CMakeLists.txt b/smoke/CMakeLists.txt index 7369388483..99c76a302c 100644 --- a/smoke/CMakeLists.txt +++ b/smoke/CMakeLists.txt @@ -32,13 +32,15 @@ target_link_libraries(smoke PRIVATE Nabla::Nabla) target_compile_definitions(smoke PRIVATE _AFXDLL) target_precompile_headers(smoke PRIVATE pch.hpp) -set(NBL_SMOKE_FLOW "CONFIGURE_ONLY" CACHE STRING "Smoke runtime flow: CONFIGURE_ONLY or BUILD_ONLY") -set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS CONFIGURE_ONLY BUILD_ONLY) +set(NBL_SMOKE_FLOW "CONFIGURE_ONLY" CACHE STRING "Smoke runtime flow: MINIMALISTIC, CONFIGURE_ONLY or BUILD_ONLY") +set_property(CACHE NBL_SMOKE_FLOW PROPERTY STRINGS MINIMALISTIC CONFIGURE_ONLY BUILD_ONLY) string(TOUPPER "${NBL_SMOKE_FLOW}" NBL_SMOKE_FLOW) message(STATUS "Smoke runtime flow: ${NBL_SMOKE_FLOW}") option(NBL_SMOKE_INSTALL_SELFTEST "Install smoke with CTest metadata and run tests from install tree" ON) -if(NBL_SMOKE_FLOW STREQUAL "CONFIGURE_ONLY") +if(NBL_SMOKE_FLOW STREQUAL "MINIMALISTIC") + message(STATUS "Smoke minimalistic flow uses only package default runtime lookup") +elseif(NBL_SMOKE_FLOW STREQUAL "CONFIGURE_ONLY") nabla_setup_runtime_modules( TARGETS smoke RUNTIME_MODULES_SUBDIR "Libraries" diff --git a/smoke/RunSmokeFlow.cmake b/smoke/RunSmokeFlow.cmake index f192e2a838..9350e6094e 100644 --- a/smoke/RunSmokeFlow.cmake +++ b/smoke/RunSmokeFlow.cmake @@ -1,10 +1,10 @@ if(NOT DEFINED FLOW) - message(FATAL_ERROR "FLOW is required. Allowed values: CONFIGURE_ONLY, BUILD_ONLY") + message(FATAL_ERROR "FLOW is required. Allowed values: MINIMALISTIC, CONFIGURE_ONLY, BUILD_ONLY") endif() string(TOUPPER "${FLOW}" FLOW) -if(NOT FLOW MATCHES "^(CONFIGURE_ONLY|BUILD_ONLY)$") - message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: CONFIGURE_ONLY, BUILD_ONLY") +if(NOT FLOW MATCHES "^(MINIMALISTIC|CONFIGURE_ONLY|BUILD_ONLY)$") + message(FATAL_ERROR "Invalid FLOW='${FLOW}'. Allowed values: MINIMALISTIC, CONFIGURE_ONLY, BUILD_ONLY") endif() if(NOT DEFINED CONFIG) @@ -44,12 +44,17 @@ endfunction() file(REMOVE_RECURSE "${BUILD_DIR}") +set(_run_install_selftest ON) +if(FLOW STREQUAL "MINIMALISTIC") + set(_run_install_selftest OFF) +endif() + run_cmd( "${CMAKE_COMMAND}" -S "${SMOKE_SOURCE_DIR}" -B "${BUILD_DIR}" -D "NBL_SMOKE_FLOW=${FLOW}" - -D "NBL_SMOKE_INSTALL_SELFTEST=ON" + -D "NBL_SMOKE_INSTALL_SELFTEST=${_run_install_selftest}" ) run_cmd( @@ -68,21 +73,23 @@ run_cmd( -C "${CONFIG}" ) -file(REMOVE_RECURSE "${INSTALL_DIR}") +if(_run_install_selftest) + file(REMOVE_RECURSE "${INSTALL_DIR}") -run_cmd( - "${CMAKE_COMMAND}" - --install "${BUILD_DIR}" - --config "${CONFIG}" - --prefix "${INSTALL_DIR}" -) + run_cmd( + "${CMAKE_COMMAND}" + --install "${BUILD_DIR}" + --config "${CONFIG}" + --prefix "${INSTALL_DIR}" + ) -run_cmd( - "${CTEST_BIN}" - --verbose - --test-dir "${INSTALL_DIR}" - --force-new-ctest-process - --output-on-failure - --no-tests=error - -C "${CONFIG}" -) + run_cmd( + "${CTEST_BIN}" + --verbose + --test-dir "${INSTALL_DIR}" + --force-new-ctest-process + --output-on-failure + --no-tests=error + -C "${CONFIG}" + ) +endif() From 4742eabf207a5fb2ff26d0317acd793e638491bd Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Feb 2026 02:19:03 +0100 Subject: [PATCH 34/54] Add minimalistic smoke flow to CI --- .github/workflows/build-nabla.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index fea595428f..e14586510b 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -385,6 +385,9 @@ jobs: if (-not (Test-Path "smoke/build-ct/install")) { throw "smoke/build-ct/install not found" } tree.com smoke /F + - name: Smoke Flow MINIMALISTIC + run: cmake -D FLOW=MINIMALISTIC -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake + - name: Smoke Flow CONFIGURE_ONLY run: cmake -D FLOW=CONFIGURE_ONLY -D CONFIG=${{ matrix.config }} -P smoke/RunSmokeFlow.cmake From 1b6611b4cb29d83a9e1e5eea73201060098ed7fc Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Feb 2026 11:44:08 +0100 Subject: [PATCH 35/54] Fix runtime sync trigger and Docker setup retries --- .github/workflows/build-nabla.yml | 32 ++++++++++++++++++++++++++++--- .github/workflows/run-nsc.yml | 32 ++++++++++++++++++++++++++++--- cmake/NablaConfig.cmake.in | 9 +++++++-- 3 files changed, 65 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-nabla.yml b/.github/workflows/build-nabla.yml index e14586510b..44c9808ff0 100644 --- a/.github/workflows/build-nabla.yml +++ b/.github/workflows/build-nabla.yml @@ -50,9 +50,35 @@ jobs: Set-MpPreference -DisableArchiveScanning $true Set-MpPreference -DisableScanningMappedNetworkDrivesForFullScan $true - if (-not (docker network ls --format '{{.Name}}' | Where-Object { $_ -eq 'docker_default' })) { - docker network create --driver nat docker_default - if ($LASTEXITCODE -ne 0) { exit 1 } + $maxAttempts = 12 + $delaySeconds = 5 + $dockerReady = $false + + for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) { + $networkNames = docker network ls --format '{{.Name}}' + if ($LASTEXITCODE -eq 0) { + if (-not ($networkNames | Where-Object { $_ -eq 'docker_default' })) { + docker network create --driver nat docker_default + if ($LASTEXITCODE -eq 0) { + $dockerReady = $true + break + } + } + else { + $dockerReady = $true + break + } + } + + if ($attempt -lt $maxAttempts) { + Write-Host "Docker not ready yet (attempt $attempt/$maxAttempts), retry in ${delaySeconds}s..." + Start-Sleep -Seconds $delaySeconds + } + } + + if (-not $dockerReady) { + Write-Error "Docker was not ready after $($maxAttempts*$delaySeconds)s total wait" + exit 1 } - name: Set prefix diff --git a/.github/workflows/run-nsc.yml b/.github/workflows/run-nsc.yml index d5f9f74c2b..ce050581dc 100644 --- a/.github/workflows/run-nsc.yml +++ b/.github/workflows/run-nsc.yml @@ -51,9 +51,35 @@ jobs: Set-MpPreference -DisableArchiveScanning $true Set-MpPreference -DisableScanningMappedNetworkDrivesForFullScan $true - if (-not (docker network ls --format '{{.Name}}' | Where-Object { $_ -eq 'docker_default' })) { - docker network create --driver nat docker_default - if ($LASTEXITCODE -ne 0) { exit 1 } + $maxAttempts = 12 + $delaySeconds = 5 + $dockerReady = $false + + for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) { + $networkNames = docker network ls --format '{{.Name}}' + if ($LASTEXITCODE -eq 0) { + if (-not ($networkNames | Where-Object { $_ -eq 'docker_default' })) { + docker network create --driver nat docker_default + if ($LASTEXITCODE -eq 0) { + $dockerReady = $true + break + } + } + else { + $dockerReady = $true + break + } + } + + if ($attempt -lt $maxAttempts) { + Write-Host "Docker not ready yet (attempt $attempt/$maxAttempts), retry in ${delaySeconds}s..." + Start-Sleep -Seconds $delaySeconds + } + } + + if (-not $dockerReady) { + Write-Error "Docker was not ready after $($maxAttempts*$delaySeconds)s total wait" + exit 1 } $sendDiscord = "${{ inputs.withDiscordMSG }}" -eq "true" diff --git a/cmake/NablaConfig.cmake.in b/cmake/NablaConfig.cmake.in index b67293d5ab..c91f6dfaec 100644 --- a/cmake/NablaConfig.cmake.in +++ b/cmake/NablaConfig.cmake.in @@ -286,13 +286,18 @@ function(_nbl_runtime_modules_add_configure_sync_rule_for_pairs _CFG_DST_PAIRS _ cmake_path(GET _nabla_runtime_file FILENAME _nabla_runtime_name) cmake_path(GET _dxc_runtime_file FILENAME _dxc_runtime_name) - file(COPY_FILE "${_nabla_runtime_file}" "${_runtime_modules_dst}/${_nabla_runtime_name}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) - file(COPY_FILE "${_dxc_runtime_file}" "${_runtime_modules_dst}/${_dxc_runtime_name}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + set(_nabla_runtime_dst "${_runtime_modules_dst}/${_nabla_runtime_name}") + set(_dxc_runtime_dst "${_runtime_modules_dst}/${_dxc_runtime_name}") + + file(COPY_FILE "${_nabla_runtime_file}" "${_nabla_runtime_dst}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) + file(COPY_FILE "${_dxc_runtime_file}" "${_dxc_runtime_dst}" ONLY_IF_DIFFERENT INPUT_MAY_BE_RECENT) if(_ENABLE_CONFIGURE_DEPENDS) set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${_nabla_runtime_file}" "${_dxc_runtime_file}" + "${_nabla_runtime_dst}" + "${_dxc_runtime_dst}" ) endif() From 6e4285482fd3fc313cb81a4a4cf7c2c1718057d3 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 27 Feb 2026 08:04:00 +0100 Subject: [PATCH 36/54] fix 4 year old Junior code --- include/nbl/system/ILogger.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/include/nbl/system/ILogger.h b/include/nbl/system/ILogger.h index db013ebeb4..72c271e72e 100644 --- a/include/nbl/system/ILogger.h +++ b/include/nbl/system/ILogger.h @@ -61,20 +61,15 @@ class ILogger : public core::IReferenceCounted using namespace std::chrono; auto currentTime = std::chrono::system_clock::now(); const std::time_t t = std::chrono::system_clock::to_time_t(currentTime); - - // Since there is no real way in c++ to get current time with microseconds, this is my weird approach - auto time_since_epoch = duration_cast(system_clock::now().time_since_epoch()); - auto time_since_epoch_s = duration_cast(system_clock::now().time_since_epoch()); - time_since_epoch -= duration_cast(time_since_epoch_s); - - // This while is for the microseconds which are less that 6 digits long to be aligned with the others - while (time_since_epoch.count() / 100000 == 0) time_since_epoch *= 10; auto time = std::localtime(&t); + // since there's no microseconds in `time` + const auto us_fraction = duration_cast(currentTime.time_since_epoch()) - duration_cast(duration_cast(currentTime.time_since_epoch())); + constexpr size_t DATE_STR_LENGTH = 28; std::string timeStr(DATE_STR_LENGTH, '\0'); - sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%d]", time->tm_mday, time->tm_mon + 1, 1900 + time->tm_year, time->tm_hour, time->tm_min, time->tm_sec, (int)time_since_epoch.count()); + sprintf(timeStr.data(), "[%02d.%02d.%d %02d:%02d:%02d:%06d]", time->tm_mday, time->tm_mon + 1, 1900 + time->tm_year, time->tm_hour, time->tm_min, time->tm_sec, (int)us_fraction.count()); std::string messageTypeStr; switch (logLevel) From 82e3ae1186f94232963a80c0cf4d02b14b103901 Mon Sep 17 00:00:00 2001 From: devshgraphicsprogramming Date: Fri, 27 Feb 2026 09:29:00 +0100 Subject: [PATCH 37/54] handle when we have more than 128kb of command data to track --- include/nbl/video/IGPUAccelerationStructure.h | 36 +-- include/nbl/video/IGPUCommandBuffer.h | 18 +- include/nbl/video/IGPUCommandPool.h | 299 ++++++++++++++---- src/nbl/video/IGPUCommandBuffer.cpp | 31 +- src/nbl/video/IQueue.cpp | 7 +- 5 files changed, 283 insertions(+), 108 deletions(-) diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 3c10a255a2..7da33e2cfa 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -289,8 +289,9 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat totalPrims += buildRangeInfo.primitiveCount; return true; } - - inline core::smart_refctd_ptr* fillTracking(core::smart_refctd_ptr* oit) const + + template // TODO: requires + inline ForwardIterator fillTracking(ForwardIterator oit) const { *(oit++) = core::smart_refctd_ptr(Base::scratch.buffer); if (Base::isUpdate) @@ -486,7 +487,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr return retval; } - inline core::smart_refctd_ptr* fillTracking(core::smart_refctd_ptr* oit) const + template // TODO: requires + inline ForwardIterator fillTracking(ForwardIterator oit) const { *(oit++) = core::smart_refctd_ptr(Base::scratch.buffer); if (Base::isUpdate) @@ -713,8 +715,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr *(tracked++) = *(it++); } // Useful if TLAS got built externally as well - template - inline void insertTrackedBLASes(const Iterator begin, const Iterator end, const build_ver_t buildVer) + template + inline void insertTrackedBLASes(ForwardIterator begin, const uint32_t count, const build_ver_t buildVer) { if (buildVer==0) return; @@ -725,14 +727,19 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr for (auto it=std::next(prev); it!=m_pendingBuilds.end()&&it->ordinal>buildVer; prev=it++) {} auto inserted = m_pendingBuilds.emplace_after(prev); // now fill the contents - inserted->BLASes.insert(begin,end); + inserted->BLASes.reserve(count); + for (auto i=0u; iBLASes.insert(*begin); + ++begin; + } inserted->ordinal = buildVer; } - template - inline build_ver_t pushTrackedBLASes(const Iterator begin, const Iterator end) + template + inline build_ver_t pushTrackedBLASes(const ForwardIterator begin, const uint32_t count) { const auto buildVer = registerNextBuildVer(); - insertTrackedBLASes(begin,end,buildVer); + insertTrackedBLASes(begin,count,buildVer); return buildVer; } // a little utility to make sure nothing from before this build version gets tracked @@ -750,18 +757,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr const uint32_t m_maxInstanceCount; private: - struct DynamicUpCastingSpanIterator - { - inline bool operator!=(const DynamicUpCastingSpanIterator& other) const {return ptr!=other.ptr;} - - inline DynamicUpCastingSpanIterator operator++() {return {ptr++};} - - inline const IGPUBottomLevelAccelerationStructure* operator*() const {return dynamic_cast(ptr->get());} - - std::span>::iterator ptr; - }; friend class ILogicalDevice; friend class IQueue; + inline const core::unordered_set* getPendingBuildTrackedBLASes(const build_ver_t buildVer) const { const auto found = std::find_if(m_pendingBuilds.begin(),m_pendingBuilds.end(),[buildVer](const auto& item)->bool{return item.ordinal==buildVer;}); diff --git a/include/nbl/video/IGPUCommandBuffer.h b/include/nbl/video/IGPUCommandBuffer.h index 6b3bfef18c..8f0f1fce30 100644 --- a/include/nbl/video/IGPUCommandBuffer.h +++ b/include/nbl/video/IGPUCommandBuffer.h @@ -552,9 +552,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject { auto oit = reserveReferences(std::distance(begin,end)); if (oit) - while (begin!=end) - *(oit++) = core::smart_refctd_ptr(*(begin++)); - return oit; + { + while (begin!=end) + *(oit++) = core::smart_refctd_ptr(*(begin++)); + return true; + } + return false; } inline bool recordReferences(const std::span refs) {return recordReferences(refs.begin(),refs.end());} @@ -569,8 +572,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit,size},.dst=tlas}); while (beginBLASes!=endBLASes) *(oit++) = core::smart_refctd_ptr(*(beginBLASes++)); + return true; } - return oit; + return false; } virtual bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0; @@ -885,7 +889,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject template requires nbl::is_any_of_v bool invalidDrawIndirectCount(const asset::SBufferBinding& indirectBinding, const asset::SBufferBinding& countBinding, const uint32_t maxDrawCount, const uint32_t stride); - core::smart_refctd_ptr* reserveReferences(const uint32_t size); + IGPUCommandPool::CTrackedIterator reserveReferences(const uint32_t size); // This bound descriptor set record doesn't include the descriptor sets whose layout has _any_ one of its bindings // created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT @@ -896,7 +900,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject // The Command Pool already tracks resources referenced in the Build Infos or Copies From Memory (Deserializations), so we only need pointers into those records. struct TLASTrackingWrite { - std::span> src; + // TODO: pack a little more efficiently so we can recover `CTrackedIterator` more easily + IGPUCommandPool::CTrackedIterator srcBegin; + uint32_t count; IGPUTopLevelAccelerationStructure* dst; }; struct TLASTrackingCopy diff --git a/include/nbl/video/IGPUCommandPool.h b/include/nbl/video/IGPUCommandPool.h index ddc4fcfd5c..56a0c46008 100644 --- a/include/nbl/video/IGPUCommandPool.h +++ b/include/nbl/video/IGPUCommandPool.h @@ -80,6 +80,7 @@ class IGPUCommandPool : public IBackendObject virtual const void* getNativeHandle() const = 0; // Host access to Command Pools needs to be externally synchronized anyway so its completely fine to do this + // TODO: abstract it away, useful in other externally synchronised contexts template class StackAllocation final { @@ -157,6 +158,88 @@ class IGPUCommandPool : public IBackendObject class CTraceRaysIndirectCmd; class CBindRayTracingPipelineCmd; + class IVariableSizeCommandBase; + class CExtraResourceTrackingBlock; + class CTrackedIterator final + { + public: + using value_t = core::smart_refctd_ptr; + + private: + IVariableSizeCommandBase* m_cmd = nullptr; + value_t* m_res = nullptr; + + public: + inline CTrackedIterator() {} + inline CTrackedIterator(IVariableSizeCommandBase* beginCmd) : m_cmd(beginCmd), m_res(m_cmd ? m_cmd->getLocalResources():nullptr) {} + + explicit inline operator bool() const {return m_cmd && m_res && m_resgetLocalResources()+m_cmd->getLocalResourceCount();} + inline bool operator!=(const CTrackedIterator& other) const + { + const bool selfInvalid = bool(*this); + if (selfInvalid!=bool(other)) + return true; + if (selfInvalid) + return false; + return m_cmd!=other.m_cmd || m_res!=other.m_res; + } + + inline value_t& operator*() + { + assert(bool(*this)); + return *m_res; + } + inline const value_t& operator*() const + { + assert(bool(*this)); + return *m_res; + } + + inline CTrackedIterator operator+(uint32_t advance) const + { + CTrackedIterator retval = *this; + if (bool(*this)) + { + auto* const localRes = m_cmd->getLocalResources(); + assert(m_res>=localRes); + uint32_t localPos = m_res-localRes; + if (const auto localCount=m_cmd->getLocalResourceCount(); localPos+advancem_next; + if (!retval.m_cmd) + { + retval.m_res = nullptr; + break; + } + retval.m_res = retval.m_cmd->getLocalResources(); + if (advance&& dev, const core::bitflag _flags, const uint8_t _familyIx) : IBackendObject(std::move(dev)), m_scratchAlloc(nullptr,0u,0u,_NBL_SIMD_ALIGNMENT,SCRATCH_MEMORY_SIZE), m_flags(_flags), m_familyIx(_familyIx) {} @@ -168,9 +251,24 @@ class IGPUCommandPool : public IBackendObject // for access to what? friend class IGPUCommandBuffer; + struct DynamicBLASCastingIterator + { + inline bool operator!=(const DynamicBLASCastingIterator& other) const { return orig != other.orig; } + inline DynamicBLASCastingIterator operator++() {return {++orig};} + + inline core::smart_refctd_ptr operator*() const + { + return core::smart_refctd_ptr_dynamic_cast(*orig); + } + + IGPUCommandPool::CTrackedIterator orig; + }; + friend class ILogicalDevice; + friend class IQueue; + class CCommandSegment; - class alignas(COMMAND_ALIGNMENT) ICommand + class ICommand { friend class CCommandSegment; @@ -203,55 +301,90 @@ class IGPUCommandPool : public IBackendObject void operator delete( ICommand* ptr, std::destroying_delete_t, std::size_t sz, std::align_val_t al ) { ptr->~ICommand(); } - private: - - friend CCommandSegment; - const uint32_t m_size; + // 4 bytes unused }; - template - class NBL_FORCE_EBO IFixedSizeCommand : public ICommand + class IFixedSizeCommand : public ICommand { public: template - static uint32_t calc_size(const Args&...) + static inline uint32_t calc_size(const Args&...) { static_assert(std::is_final_v); + //static_assert(sizeof(CRTP)<=CCommandSegment::STORAGE_SIZE); return sizeof(CRTP); } - virtual ~IFixedSizeCommand() = default; + inline virtual ~IFixedSizeCommand() = default; protected: inline IFixedSizeCommand() : ICommand(calc_size()) {} }; - template - class NBL_FORCE_EBO IVariableSizeCommand : public ICommand + // I can't refactor this into a base class for tracking handles, cause I want them to live at the end :( + class CCommandSegmentListPool; + class IVariableSizeCommandBase : public ICommand { public: - template - static uint32_t calc_size(const Args&... args) + inline virtual ~IVariableSizeCommandBase() { - static_assert(std::is_final_v); - return core::alignUp(sizeof(CRTP)+CRTP::calc_resources(args...)*sizeof(core::smart_refctd_ptr),alignof(CRTP)); + std::destroy_n(getLocalResources(),getLocalResourceCount()); + } + + inline uint32_t getLocalResourceCount() const {return reinterpret_cast(static_cast(this)+1)[-1];} + + protected: + struct SConstructionParams + { + uint32_t size; + uint32_t resources; + }; + static SConstructionParams calc_size(const uint32_t memoryLeft, const uint32_t thisSize, const uint32_t resourceCount) + { + SConstructionParams retval = {.size=std::min(thisSize+resourceCount*sizeof(CTrackedIterator::value_t),memoryLeft)}; + retval.resources = (retval.size-thisSize)/sizeof(CTrackedIterator::value_t); + return retval; } - virtual ~IVariableSizeCommand() + inline IVariableSizeCommandBase(const SConstructionParams& param) : ICommand(param.size), m_next(nullptr) { - std::destroy_n(getVariableCountResources(),m_resourceCount); + reinterpret_cast(static_cast(this)+1)[-1] = param.resources; + std::uninitialized_default_construct_n(getLocalResources(),getLocalResourceCount()); } - inline core::smart_refctd_ptr* getVariableCountResources() { return reinterpret_cast*>(static_cast(this)+1); } - protected: + private: + friend class CTrackedIterator; + friend class CCommandSegmentListPool; + + // methods for the iterator + inline CTrackedIterator::value_t* getLocalResources() + { + return reinterpret_cast(reinterpret_cast(this)+getSize())-getLocalResourceCount(); + } + inline const CTrackedIterator::value_t* getLocalResources() const + { + CTrackedIterator::value_t* retval = const_cast(this)->getLocalResources(); + return retval; + } + + CExtraResourceTrackingBlock* m_next; + }; + template + class IVariableSizeCommand : public IVariableSizeCommandBase + { + public: template - inline IVariableSizeCommand(const Args&... args) : ICommand(calc_size(args...)), m_resourceCount(CRTP::calc_resources(args...)) + static SConstructionParams calc_size(const uint32_t memoryLeft, const Args&... args) { - std::uninitialized_default_construct_n(getVariableCountResources(),m_resourceCount); + static_assert(std::is_final_v); + static_assert(alignof(CRTP)>=alignof(CTrackedIterator::value_t)); + return IVariableSizeCommandBase::calc_size(memoryLeft,sizeof(CRTP),CRTP::calc_resources(args...)); } - const uint32_t m_resourceCount; + protected: + template + inline IVariableSizeCommand(const uint32_t memoryLeft, const Args&... args) : IVariableSizeCommandBase(calc_size(memoryLeft,args...)) {} }; class alignas(COMMAND_SEGMENT_ALIGNMENT) CCommandSegment @@ -269,19 +402,20 @@ class IGPUCommandPool : public IBackendObject } m_header; public: - static inline constexpr uint32_t STORAGE_SIZE = COMMAND_SEGMENT_SIZE - core::roundUp(sizeof(header_t), alignof(ICommand)); + static inline constexpr uint32_t STORAGE_SIZE = COMMAND_SEGMENT_SIZE - core::roundUp(sizeof(header_t),alignof(ICommand)); - CCommandSegment(CCommandSegment* prev): + inline CCommandSegment(CCommandSegment* prev): m_header(nullptr, 0u, 0u, alignof(ICommand), STORAGE_SIZE) { - static_assert(alignof(ICommand) == COMMAND_SEGMENT_ALIGNMENT); + static_assert(alignof(ICommand) <= COMMAND_ALIGNMENT); + static_assert(COMMAND_ALIGNMENT <= COMMAND_SEGMENT_ALIGNMENT); wipeNextCommandSize(); if (prev) prev->m_header.next = this; } - ~CCommandSegment() + inline ~CCommandSegment() { for (ICommand* cmd = begin(); cmd != end();) { @@ -297,8 +431,12 @@ class IGPUCommandPool : public IBackendObject template Cmd* allocate(const Args&... args) { - const uint32_t cmdSize = Cmd::calc_size(args...); - const auto address = m_header.commandAllocator.alloc_addr(cmdSize, alignof(Cmd)); + uint32_t cmdSize; + if constexpr (std::is_base_of_v) + cmdSize = Cmd::calc_size(args...).size; + else + cmdSize = Cmd::calc_size(args...); + const auto address = m_header.commandAllocator.alloc_addr(cmdSize,alignof(Cmd)); if (address == decltype(m_header.commandAllocator)::invalid_address) return nullptr; @@ -308,6 +446,8 @@ class IGPUCommandPool : public IBackendObject return cmdMem; } + inline uint32_t max_size() const {return m_header.commandAllocator.max_size();} + inline CCommandSegment* getNext() const { return m_header.next; } inline CCommandSegment* getNextHead() const { return m_header.nextHead; } inline CCommandSegment* getPrevHead() const { return m_header.prevHead; } @@ -337,7 +477,7 @@ class IGPUCommandPool : public IBackendObject void wipeNextCommandSize() { const auto nextCmdOffset = m_header.commandAllocator.get_allocated_size(); - const auto wipeEnd = nextCmdOffset + offsetof(IGPUCommandPool::ICommand, m_size) + sizeof(IGPUCommandPool::ICommand::m_size); + const auto wipeEnd = nextCmdOffset + offsetof(IGPUCommandPool::ICommand,m_size) + sizeof(IGPUCommandPool::ICommand::m_size); if (wipeEnd < m_header.commandAllocator.get_total_size()) *(const_cast(&(reinterpret_cast(m_data + nextCmdOffset)->m_size))) = 0; } @@ -345,6 +485,23 @@ class IGPUCommandPool : public IBackendObject static_assert(sizeof(CCommandSegment)==COMMAND_SEGMENT_SIZE); private: + class CExtraResourceTrackingBlock final : public IVariableSizeCommandBase + { + public: + static SConstructionParams calc_size(const uint32_t extraResourceCount) + { + static_assert(alignof(CExtraResourceTrackingBlock)>=alignof(CTrackedIterator::value_t)); + return IVariableSizeCommandBase::calc_size(CCommandSegment::STORAGE_SIZE,sizeof(CExtraResourceTrackingBlock),extraResourceCount); + } + + // this command will always be created at the start of a new segment, the whole reason it exists is because previous command has overflown the segment + inline CExtraResourceTrackingBlock(const uint32_t extraResourceCount) : IVariableSizeCommandBase(calc_size(extraResourceCount)) {} + + static inline uint32_t calc_resources(const uint32_t extraResourceCount) + { + return extraResourceCount; + } + }; class CCommandSegmentListPool { public: @@ -354,7 +511,7 @@ class IGPUCommandPool : public IBackendObject CCommandSegment* tail = nullptr; }; - CCommandSegmentListPool() : m_pool(COMMAND_SEGMENTS_PER_BLOCK*COMMAND_SEGMENT_SIZE, 0u, MAX_COMMAND_SEGMENT_BLOCK_COUNT, MIN_POOL_ALLOC_SIZE) {} + inline CCommandSegmentListPool() : m_pool(COMMAND_SEGMENTS_PER_BLOCK*COMMAND_SEGMENT_SIZE, 0u, MAX_COMMAND_SEGMENT_BLOCK_COUNT, MIN_POOL_ALLOC_SIZE) {} template Cmd* emplace(SCommandSegmentList& list, Args&&... args) @@ -362,14 +519,29 @@ class IGPUCommandPool : public IBackendObject if (!list.tail && !appendToList(list)) return nullptr; + constexpr bool IsVariableSize = std::is_base_of_v,Cmd>; + uint32_t resourcesLeft = 0u; + if constexpr (IsVariableSize) + resourcesLeft = Cmd::calc_resources(args...); // not forwarding twice because newCmd() will never be called the second time auto newCmd = [&]() -> Cmd* { - auto cmdMem = list.tail->allocate(args...); + Cmd* cmdMem; + uint32_t unallocatedSize; + if constexpr (IsVariableSize) + { + unallocatedSize = list.tail->max_size(); + cmdMem = list.tail->allocate(unallocatedSize,args...); + } + else + cmdMem = list.tail->allocate(args...); if (!cmdMem) return nullptr; - return new (cmdMem) Cmd(std::forward(args)...); + if constexpr (IsVariableSize) + return new (cmdMem) Cmd(unallocatedSize,std::forward(args)...); + else + return new (cmdMem) Cmd(std::forward(args)...); }; auto cmd = newCmd(); @@ -379,10 +551,21 @@ class IGPUCommandPool : public IBackendObject return nullptr; cmd = newCmd(); - if (!cmd) + assert(cmd); + } + + // now handle segmenting the tracked resources + if constexpr (IsVariableSize) + { + for (IVariableSizeCommandBase* prev=cmd; (resourcesLeft-=prev->getLocalResourceCount())!=0u; ) { - assert(false); - return nullptr; + if (!appendToList(list)) + return nullptr; + auto* const mem = list.tail->allocate(resourcesLeft); + assert(mem); + auto* const extra = new (mem) CExtraResourceTrackingBlock(resourcesLeft); + prev->m_next = extra; + prev = extra; } } @@ -398,13 +581,13 @@ class IGPUCommandPool : public IBackendObject if (head == m_head) m_head = head->getNextHead(); - CCommandSegment::linkHeads(head->getPrevHead(), head->getNextHead()); + CCommandSegment::linkHeads(head->getPrevHead(),head->getNextHead()); for (auto& segment = head; segment;) { auto nextSegment = segment->getNext(); segment->~CCommandSegment(); - m_pool.deallocate(segment, COMMAND_SEGMENT_SIZE); + m_pool.deallocate(segment,COMMAND_SEGMENT_SIZE); segment = nextSegment; } } @@ -462,7 +645,7 @@ class IGPUCommandPool : public IBackendObject class IGPUCommandPool::CBindIndexBufferCmd final : public IFixedSizeCommand { public: - CBindIndexBufferCmd(core::smart_refctd_ptr&& indexBuffer) : m_indexBuffer(std::move(indexBuffer)) {} + inline CBindIndexBufferCmd(core::smart_refctd_ptr&& indexBuffer) : m_indexBuffer(std::move(indexBuffer)) {} private: core::smart_refctd_ptr m_indexBuffer; @@ -471,7 +654,7 @@ class IGPUCommandPool::CBindIndexBufferCmd final : public IFixedSizeCommand { public: - CIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} + inline CIndirectCmd(core::smart_refctd_ptr&& buffer) : m_buffer(std::move(buffer)) {} private: core::smart_refctd_ptr m_buffer; @@ -480,7 +663,7 @@ class IGPUCommandPool::CIndirectCmd final : public IFixedSizeCommand { public: - CDrawIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) + inline CDrawIndirectCountCmd(core::smart_refctd_ptr&& buffer, core::smart_refctd_ptr&& countBuffer) : m_buffer(std::move(buffer)), m_countBuffer(std::move(countBuffer)) {} @@ -503,9 +686,10 @@ class IGPUCommandPool::CBeginRenderPassCmd final : public IFixedSizeCommand { public: - CPipelineBarrierCmd(const uint32_t bufferCount, const uint32_t imageCount) : IVariableSizeCommand(bufferCount,imageCount) {} + inline CPipelineBarrierCmd(const uint32_t memoryLeft, const uint32_t bufferCount, const uint32_t imageCount) : + IVariableSizeCommand(memoryLeft,bufferCount,imageCount) {} - static uint32_t calc_resources(const uint32_t bufferCount, const uint32_t imageCount) + static inline uint32_t calc_resources(const uint32_t bufferCount, const uint32_t imageCount) { return bufferCount+imageCount; } @@ -514,7 +698,7 @@ class IGPUCommandPool::CPipelineBarrierCmd final : public IVariableSizeCommand { public: - CBindDescriptorSetsCmd(core::smart_refctd_ptr&& pipelineLayout, const uint32_t setCount, const IGPUDescriptorSet* const* const sets) + inline CBindDescriptorSetsCmd(core::smart_refctd_ptr&& pipelineLayout, const uint32_t setCount, const IGPUDescriptorSet* const* const sets) : m_layout(std::move(pipelineLayout)) { for (auto i = 0; i < setCount; ++i) @@ -532,7 +716,7 @@ class IGPUCommandPool::CBindDescriptorSetsCmd final : public IFixedSizeCommand { public: - CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} + inline CBindComputePipelineCmd(core::smart_refctd_ptr&& pipeline) : m_pipeline(std::move(pipeline)) {} private: core::smart_refctd_ptr m_pipeline; @@ -679,7 +863,7 @@ class IGPUCommandPool::CCopyImageToBufferCmd final : public IFixedSizeCommand { public: - CExecuteCommandsCmd(const uint32_t count) : IVariableSizeCommand(count) {} + CExecuteCommandsCmd(const uint32_t memoryLeft, const uint32_t count) : IVariableSizeCommand(memoryLeft,count) {} static uint32_t calc_resources(const uint32_t count) { @@ -690,7 +874,7 @@ class IGPUCommandPool::CExecuteCommandsCmd final : public IVariableSizeCommand { public: - CCustomReferenceCmd(const uint32_t count) : IVariableSizeCommand(count) {} + CCustomReferenceCmd(const uint32_t memoryLeft, const uint32_t count) : IVariableSizeCommand(memoryLeft,count) {} static uint32_t calc_resources(const uint32_t count) { @@ -701,22 +885,13 @@ class IGPUCommandPool::CCustomReferenceCmd final : public IVariableSizeCommand { public: - CWaitEventsCmd(const uint32_t eventCount, IEvent *const *const events, const uint32_t totalBufferCount, const uint32_t totalImageCount) - : IVariableSizeCommand(eventCount,events,totalBufferCount,totalImageCount), m_eventCount(eventCount) - { - for (auto i=0u; i(events[i]); - } - - inline core::smart_refctd_ptr* getDeviceMemoryBacked() {return reinterpret_cast*>(getVariableCountResources()+m_eventCount);} + CWaitEventsCmd(const uint32_t memoryLeft, const uint32_t eventCount, const uint32_t totalBufferCount, const uint32_t totalImageCount) + : IVariableSizeCommand(memoryLeft,eventCount,totalBufferCount,totalImageCount) {} - static uint32_t calc_resources(const uint32_t eventCount, const IEvent *const *const, const uint32_t totalBufferCount, const uint32_t totalImageCount) + static uint32_t calc_resources(const uint32_t eventCount, const uint32_t totalBufferCount, const uint32_t totalImageCount) { return eventCount+totalBufferCount+totalImageCount; } - - private: - const uint32_t m_eventCount; }; class IGPUCommandPool::CCopyImageCmd final : public IFixedSizeCommand @@ -790,9 +965,9 @@ class IGPUCommandPool::CWriteAccelerationStructurePropertiesCmd final : public I // If we take queryPool as rvalue ref here (core::smart_refctd_ptr&&), in calc_size it will become const core::smart_refctd_ptr // because calc_size takes its arguments by const ref (https://github.com/Devsh-Graphics-Programming/Nabla/blob/04fcae3029772cbc739ccf6ba80f72e6e12f54e8/include/nbl/video/IGPUCommandPool.h#L76) // , that means we will not be able to pass a core::smart_refctd_ptr when emplacing the command. So instead, we take a raw pointer and create refctd pointers here. - CWriteAccelerationStructurePropertiesCmd(const IQueryPool* queryPool, const uint32_t accelerationStructureCount) - : IVariableSizeCommand(queryPool,accelerationStructureCount), m_queryPool(core::smart_refctd_ptr(queryPool)) - {} + CWriteAccelerationStructurePropertiesCmd(const uint32_t memoryLeft, const IQueryPool* queryPool, const uint32_t accelerationStructureCount) + : IVariableSizeCommand(memoryLeft,queryPool,accelerationStructureCount), + m_queryPool(core::smart_refctd_ptr(queryPool)) {} static uint32_t calc_resources(const IQueryPool* queryPool, const uint32_t accelerationStructureCount) { @@ -806,7 +981,7 @@ class IGPUCommandPool::CWriteAccelerationStructurePropertiesCmd final : public I class IGPUCommandPool::CBuildAccelerationStructuresCmd final : public IVariableSizeCommand { public: - inline CBuildAccelerationStructuresCmd(const uint32_t resourceCount) : IVariableSizeCommand(resourceCount) {} + inline CBuildAccelerationStructuresCmd(const uint32_t memoryLeft, const uint32_t resourceCount) : IVariableSizeCommand(memoryLeft,resourceCount) {} static inline uint32_t calc_resources(const uint32_t resourceCount) { diff --git a/src/nbl/video/IGPUCommandBuffer.cpp b/src/nbl/video/IGPUCommandBuffer.cpp index f47428aae8..7ec56d2b93 100644 --- a/src/nbl/video/IGPUCommandBuffer.cpp +++ b/src/nbl/video/IGPUCommandBuffer.cpp @@ -337,16 +337,17 @@ bool IGPUCommandBuffer::waitEvents(const std::span events, const SEvent totalImageCount += depInfo.imgBarriers.size(); } - auto* cmd = m_cmdpool->m_commandListPool.emplace(m_commandList,events.size(),events.data(),totalBufferCount,totalImageCount); + auto* cmd = m_cmdpool->m_commandListPool.emplace(m_commandList,static_cast(events.size()),totalBufferCount,totalImageCount); if (!cmd) { NBL_LOG_ERROR("out of host memory!"); return false; } - auto outIt = cmd->getDeviceMemoryBacked(); + IGPUCommandPool::CTrackedIterator outIt(cmd); for (auto i=0u; i(events[i]); const auto& depInfo = depInfos[i]; for (const auto& barrier : depInfo.bufBarriers) *(outIt++) = barrier.range.buffer; @@ -453,7 +454,7 @@ bool IGPUCommandBuffer::pipelineBarrier(const core::bitflaggetVariableCountResources(); + IGPUCommandPool::CTrackedIterator outIt(cmd); for (const auto& barrier : depInfo.bufBarriers) *(outIt++) = barrier.range.buffer; for (const auto& barrier : depInfo.imgBarriers) @@ -804,18 +805,15 @@ uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::spangetVariableCountResources(); + auto oit = IGPUCommandPool::CTrackedIterator(cmd); if (indirectBuffer) *(oit++) = core::smart_refctd_ptr(indirectBuffer); for (const auto& info : infos) { - oit = info.fillTracking(oit); - // we still need to clear the BLAS tracking list if the TLAS has nothing to track + // we still need to clear the BLAS tracking list if the TLAS has nothing to track, so add even if trackedBLASes.empty() if constexpr (std::is_same_v) - { - const auto blasCount = info.trackedBLASes.size(); - m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit-blasCount,blasCount},.dst=info.dstAS}); - } + m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.srcBegin=oit,.count=static_cast(info.trackedBLASes.size()),.dst=info.dstAS}); + oit = info.fillTracking(oit); } return totalGeometries; @@ -918,11 +916,11 @@ bool IGPUCommandBuffer::copyAccelerationStructureFromMemory(const AccelerationSt const bool retval = copyAccelerationStructureFromMemory_impl(copyInfo.src,copyInfo.dst); if constexpr (std::is_same_v) { - const auto size = copyInfo.trackedBLASes.size(); + const uint32_t size = copyInfo.trackedBLASes.size(); auto oit = reserveReferences(size); if (oit) { - m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.src={oit,size},.dst=copyInfo.dst}); + m_TLASTrackingOps.emplace_back(TLASTrackingWrite{.srcBegin=oit,.count=size,.dst=copyInfo.dst}); for (const auto& blas : copyInfo.trackedBLASes) *(oit++) = core::smart_refctd_ptr(blas); } @@ -1353,7 +1351,7 @@ bool IGPUCommandBuffer::writeAccelerationStructureProperties(const std::spangetVariableCountResources(); + auto oit = IGPUCommandPool::CTrackedIterator(cmd); for (const auto* as : pAccelerationStructures) *(oit++) = core::smart_refctd_ptr(as); m_noCommands = false; @@ -2057,13 +2055,14 @@ bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer* NBL_LOG_ERROR("out of host memory!"); return false; } + auto oit = IGPUCommandPool::CTrackedIterator(cmd); for (auto i=0u; igetVariableCountResources()[i] = core::smart_refctd_ptr(cmdbufs[i]); + *(oit++) = core::smart_refctd_ptr(cmdbufs[i]); m_noCommands = false; return executeCommands_impl(count,cmdbufs); } -core::smart_refctd_ptr* IGPUCommandBuffer::reserveReferences(const uint32_t size) +IGPUCommandPool::CTrackedIterator IGPUCommandBuffer::reserveReferences(const uint32_t size) { if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT|queue_flags_t::SPARSE_BINDING_BIT)) return nullptr; @@ -2074,7 +2073,7 @@ core::smart_refctd_ptr* IGPUCommandBuffer::reserv NBL_LOG_ERROR("out of host memory!"); return nullptr; } - return cmd->getVariableCountResources(); + return IGPUCommandPool::CTrackedIterator(cmd); } } \ No newline at end of file diff --git a/src/nbl/video/IQueue.cpp b/src/nbl/video/IQueue.cpp index 108f76183c..70acecffca 100644 --- a/src/nbl/video/IQueue.cpp +++ b/src/nbl/video/IQueue.cpp @@ -177,9 +177,7 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info) case 0: { const IGPUCommandBuffer::TLASTrackingWrite& op = std::get<0>(var); - - using iterator = decltype(op.src)::iterator; - m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = op.dst->pushTrackedBLASes({op.src.begin()},{op.src.end()}); + m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = op.dst->pushTrackedBLASes({.orig=op.srcBegin},op.count); break; } case 1: @@ -192,8 +190,7 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info) // stop multiple threads messing with us std::lock_guard lk(op.src->m_trackingLock); const auto* pSrcBLASes = op.src->getPendingBuildTrackedBLASes(ver); - const std::span emptySpan = {}; - m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = pSrcBLASes ? op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->end()):op.dst->pushTrackedBLASes(emptySpan.begin(),emptySpan.end()); + m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = pSrcBLASes ? op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->size()):op.dst->pushTrackedBLASes(nullptr,0); break; } case 2: From f1493eda9386c57282b5debe22a81a5e2674b013 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 20 Jan 2026 15:56:23 +0700 Subject: [PATCH 38/54] changes to linear, bilinear, box muller for pdf and backward pdf --- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 77 +++++----- .../hlsl/sampling/box_muller_transform.hlsl | 33 ++-- include/nbl/builtin/hlsl/sampling/linear.hlsl | 62 ++++---- .../projected_spherical_triangle.hlsl | 144 +++++++++--------- 4 files changed, 159 insertions(+), 157 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 2b6282eb8d..7006e63852 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -19,50 +19,53 @@ namespace sampling template struct Bilinear { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; - // BijectiveSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; - static Bilinear create(const vector4_type bilinearCoeffs) - { - Bilinear retval; - retval.bilinearCoeffs = bilinearCoeffs; - retval.twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); - return retval; - } + static Bilinear create(const vector4_type bilinearCoeffs) + { + Bilinear retval; + retval.bilinearCoeffs = bilinearCoeffs; + retval.bilinearCoeffDiffs = vector2_type(bilinearCoeffs[2]-bilinearCoeffs[0], bilinearCoeffs[3]-bilinearCoeffs[1]); + vector2_type twiceAreasUnderXCurve = vector2_type(bilinearCoeffs[0] + bilinearCoeffs[1], bilinearCoeffs[2] + bilinearCoeffs[3]); + retval.twiceAreasUnderXCurveSumOverFour = scalar_type(4.0) / (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]); + retval.lineary = Linear::create(twiceAreasUnderXCurve); + return retval; + } - vector2_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type _u) - { - vector2_type u; - Linear lineary = Linear::create(twiceAreasUnderXCurve); - u.y = lineary.generate(_u.y); + vector2_type generate(const vector2_type _u) + { + vector2_type u; + u.y = lineary.generate(_u.y); - const vector2_type ySliceEndPoints = vector2_type(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[2], u.y), nbl::hlsl::mix(bilinearCoeffs[1], bilinearCoeffs[3], u.y)); - Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(_u.x); + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + u.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + u.y * bilinearCoeffDiffs[1]); + Linear linearx = Linear::create(ySliceEndPoints); + u.x = linearx.generate(_u.x); - rcpPdf = (twiceAreasUnderXCurve[0] + twiceAreasUnderXCurve[1]) / (4.0 * nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x)); + return u; + } - return u; - } + scalar_type backwardPdf(const vector2_type u) + { + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + u.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + u.y * bilinearCoeffDiffs[1]); + return nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x) * fourOverTwiceAreasUnderXCurveSum; + } - scalar_type pdf(const vector2_type u) - { - return 4.0 * nbl::hlsl::mix(nbl::hlsl::mix(bilinearCoeffs[0], bilinearCoeffs[1], u.x), nbl::hlsl::mix(bilinearCoeffs[2], bilinearCoeffs[3], u.x), u.y) / (bilinearCoeffs[0] + bilinearCoeffs[1] + bilinearCoeffs[2] + bilinearCoeffs[3]); - } - - // unit square: x0y0 x1y0 - // x0y1 x1y1 - vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) - vector2_type twiceAreasUnderXCurve; + // unit square: x0y0 x1y0 + // x0y1 x1y1 + vector4_type bilinearCoeffs; // (x0y0, x0y1, x1y0, x1y1) + vector2_type bilinearCoeffDiffs; + vector2_type fourOverTwiceAreasUnderXCurveSum; + Linear lineary; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 4dd774c8ba..9f76f06576 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -19,23 +19,22 @@ namespace sampling template) struct BoxMullerTransform { - using scalar_type = T; - using vector2_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - vector2_type operator()(const vector2_type xi) - { - scalar_type sinPhi, cosPhi; - math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); - return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; - } - - T stddev; + using scalar_type = T; + using vector2_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + vector2_type backwardPdf(const vector2_type outPos) + { + const vector2_type outPos2 = outPos * outPos; + return vector2_type(nbl::hlsl::exp(scalar_type(-0.5) * (outPos2.x + outPos2.y)), numbers::pi * scalar_type(0.5) * hlsl::atan2(outPos.y, outPos.x)); + } + + T stddev; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 16f583bbbf..1c12aeea29 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -19,36 +19,38 @@ namespace sampling template struct Linear { - using scalar_type = T; - using vector2_type = vector; - - // BijectiveSampler concept types - using domain_type = scalar_type; - using codomain_type = scalar_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; - - static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end) - { - Linear retval; - retval.linearCoeffStart = linearCoeffs[0]; - retval.rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); - vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; - retval.squaredCoeffStart = squaredCoeffs[0]; - retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; - return retval; - } - - scalar_type generate(const scalar_type u) - { - return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); - } - - scalar_type linearCoeffStart; - scalar_type rcpDiff; - scalar_type squaredCoeffStart; - scalar_type squaredCoeffDiff; + using scalar_type = T; + using vector2_type = vector; + + // BijectiveSampler concept types + using domain_type = scalar_type; + using codomain_type = scalar_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; + + static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end), assumed to be at x=0 and x=1 + { + Linear retval; + scalar_type rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); + retval.linearCoeffStartOverDiff = linearCoeffs[0] * rcpDiff; + vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; + scalar_type squaredRcpDiff = rcpDiff * rcpDiff; + retval.squaredCoeffStartOverDiff = squaredCoeffs[0] * squaredRcpDiff; + retval.squaredCoeffDiffOverDiff = (squaredCoeffs[1] - squaredCoeffs[0]) * squaredRcpDiff; + return retval; + } + + scalar_type generate(const scalar_type u) + { + return hlsl::mix(u, (linearCoeffStartOverDiff - hlsl::sqrt(squaredCoeffStartOverDiff + u * squaredCoeffDiffOverDiff)), hlsl::abs(linearCoeffStartOverDiff) < numeric_limits::max); + } + + // TODO: add forwardPdf and backwardPdf methods, forward computes from u and backwards from the result of generate + + scalar_type linearCoeffStartOverDiff; + scalar_type squaredCoeffStartOverDiff; + scalar_type squaredCoeffDiffOverDiff; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index eeb48ea388..63926c9df4 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -22,79 +22,77 @@ namespace sampling template struct ProjectedSphericalTriangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector3_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - ProjectedSphericalTriangle retval; - retval.tri = tri; - return retval; - } - - vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) - { - const scalar_type minimumProjSolidAngle = 0.0; - - matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); - - return bxdfPdfAtVertex.yyxz; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) - { - vector2_type u; - // pre-warp according to proj solid angle approximation - vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); - Bilinear bilinear = Bilinear::create(patch); - u = bilinear.generate(rcpPdf, _u); - - // now warp the points onto a spherical triangle - const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - rcpPdf *= solidAngle; - - return L; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); - } - - scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) - { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); - } - - scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) - { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, L); - - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.pdf(u); - } - - shapes::SphericalTriangle tri; - sampling::SphericalTriangle sphtri; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) + { + ProjectedSphericalTriangle retval; + retval.tri = tri; + return retval; + } + + vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) + { + const scalar_type minimumProjSolidAngle = 0.0; + + matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); + + return bxdfPdfAtVertex.yyxz; + } + + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) + { + vector2_type u; + // pre-warp according to proj solid angle approximation + vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); + Bilinear bilinear = Bilinear::create(patch); + u = bilinear.generate(_u); + + // now warp the points onto a spherical triangle + const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + rcpPdf = solidAngle / bilinear.backwardPdf(u); + + return L; + } + + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) + { + scalar_type cos_a, cos_c, csc_b, csc_c; + vector3_type cos_vertices, sin_vertices; + const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); + return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); + } + + scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) + { + scalar_type pdf; + const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); + Bilinear bilinear = Bilinear::create(patch); + return pdf * bilinear.backwardPdf(u); + } + + scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) + { + scalar_type pdf; + const vector2_type u = sphtri.generateInverse(pdf, L); + vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); + Bilinear bilinear = Bilinear::create(patch); + return pdf * bilinear.backwardPdf(u); + } + + shapes::SphericalTriangle tri; + sampling::SphericalTriangle sphtri; }; } // namespace sampling From 5933fe0478e1eb1b2729a0a814139d22645a4cbd Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 21 Jan 2026 15:08:09 +0700 Subject: [PATCH 39/54] changes to solid angle method name, simplified a lot of code in spherical triangle --- .../projected_spherical_triangle.hlsl | 7 +- .../hlsl/sampling/spherical_triangle.hlsl | 204 +++++++++--------- .../hlsl/shapes/spherical_rectangle.hlsl | 4 +- .../hlsl/shapes/spherical_triangle.hlsl | 58 ++--- 4 files changed, 133 insertions(+), 140 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 63926c9df4..0952ed423a 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -67,9 +67,12 @@ struct ProjectedSphericalTriangle vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) { - scalar_type cos_a, cos_c, csc_b, csc_c; + const scalar_type cos_a = tri.cos_sides[0]; + const scalar_type cos_c = tri.cos_sides[2]; + const scalar_type csc_b = tri.csc_sides[1]; + const scalar_type csc_c = tri.csc_sides[2]; vector3_type cos_vertices, sin_vertices; - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); + const scalar_type solidAngle = tri.solidAngle(cos_vertices, sin_vertices); return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); } diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 5d9d32ad21..430c8ccd0d 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -22,105 +22,111 @@ namespace sampling template struct SphericalTriangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - - // BijectiveSampler concept types - using domain_type = vector2_type; - using codomain_type = vector3_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - using inverse_sample_type = domain_and_rcpPdf; - - static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - SphericalTriangle retval; - retval.tri = tri; - return retval; - } - - // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) - { - scalar_type negSinSubSolidAngle, negCosSubSolidAngle; - math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); - - const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; - const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; - - // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful - scalar_type u_ = q - cos_vertices[0]; - scalar_type v_ = p + sin_vertices[0] * cos_c; - - // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors - vector3_type C_s = tri.vertex0; - if (csc_b < numeric_limits::max) - { - const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); - if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion::slerp_delta(tri.vertex0, tri.vertex2 * csc_b, cosAngleAlongAC); - } - - vector3_type retval = tri.vertex1; - const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertex1); - const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); - if (csc_b_s < numeric_limits::max) - { - const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); - if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion::slerp_delta(tri.vertex1, C_s * csc_b_s, cosAngleAlongBC_s); - } - return retval; - } - - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - rcpPdf = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) - { - pdf = 1.0 / solidAngle; - - const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertex1); - const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); - const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertex0); - - const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; - const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); - - const scalar_type cosC_ = sin_vertices[0] * sinB_ * cos_c - cos_vertices[0] * cosB_; - const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); - - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); - angle_adder.addAngle(cosB_, sinB_); - angle_adder.addAngle(cosC_, sinC_); - const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi)*pdf; - const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; - - const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); - const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); - - return vector2_type(u, v); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) - { - scalar_type cos_a, cos_c, csc_b, csc_c; - vector3_type cos_vertices, sin_vertices; - - const scalar_type solidAngle = tri.solidAngleOfTriangle(cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c); - - return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - } - - shapes::SphericalTriangle tri; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + + // BijectiveSampler concept types + using domain_type = vector2_type; + using codomain_type = vector3_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + using inverse_sample_type = domain_and_rcpPdf; + + static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) + { + SphericalTriangle retval; + retval.tri = tri; + return retval; + } + + // WARNING: can and will return NAN if one or three of the triangle edges are near zero length + vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) + { + scalar_type negSinSubSolidAngle,negCosSubSolidAngle; + math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); + + const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; + const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; + + // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful + scalar_type u_ = q - cos_vertices[0]; + scalar_type v_ = p + sin_vertices[0] * cos_c; + + // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors + vector3_type C_s = tri.vertices[0]; + if (csc_b < numeric_limits::max) + { + const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); + if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) + C_s += math::quaternion::slerp_delta(tri.vertices[0], tri.vertices[2] * csc_b, cosAngleAlongAC); + } + + vector3_type retval = tri.vertices[1]; + const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertices[1]); + const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); + if (csc_b_s < numeric_limits::max) + { + const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); + if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) + retval += math::quaternion::slerp_delta(tri.vertices[1], C_s * csc_b_s, cosAngleAlongBC_s); + } + return retval; + } + + vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) + { + const scalar_type cos_a = tri.cos_sides[0]; + const scalar_type cos_c = tri.cos_sides[2]; + const scalar_type csc_b = tri.csc_sides[1]; + const scalar_type csc_c = tri.csc_sides[2]; + vector3_type cos_vertices, sin_vertices; + + rcpPdf = tri.solidAngle(cos_vertices, sin_vertices); + + return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + } + + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) + { + pdf = 1.0 / solidAngle; + + const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertices[1]); + const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); + const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertices[0]); + + const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; + const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); + + const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_; + const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); + + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); + angle_adder.addAngle(cosB_, sinB_); + angle_adder.addAngle(cosC_, sinC_); + const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * pdf; + const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; + + const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); + const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); + + return vector2_type(u,v); + } + + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) + { + const scalar_type cos_a = tri.cos_sides[0]; + const scalar_type cos_c = tri.cos_sides[2]; + const scalar_type csc_b = tri.csc_sides[1]; + const scalar_type csc_c = tri.csc_sides[2]; + vector3_type cos_vertices, sin_vertices; + + const scalar_type solidAngle = tri.solidAngle(cos_vertices, sin_vertices); + + return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + } + + shapes::SphericalTriangle tri; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 11442bef7c..587e221996 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -22,7 +22,6 @@ struct SphericalRectangle { using scalar_type = Scalar; using vector3_type = vector; - using vector4_type = vector; using matrix3x3_type = matrix; static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) @@ -40,8 +39,9 @@ struct SphericalRectangle return retval; } - scalar_type solidAngleOfRectangle(const vector rectangleExtents) + scalar_type solidAngle(const vector rectangleExtents) { + using vector4_type = vector; const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index f574b106ce..028d3e3653 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -25,36 +25,29 @@ struct SphericalTriangle using scalar_type = T; using vector3_type = vector; - static SphericalTriangle create(const vector3_type vertex0, const vector3_type vertex1, const vector3_type vertex2, const vector3_type origin) + static SphericalTriangle create(const vector3_type vertices[3], const vector3_type origin) { SphericalTriangle retval; - retval.vertex0 = nbl::hlsl::normalize(vertex0 - origin); - retval.vertex1 = nbl::hlsl::normalize(vertex1 - origin); - retval.vertex2 = nbl::hlsl::normalize(vertex2 - origin); - retval.cos_sides = vector3_type(hlsl::dot(retval.vertex1, retval.vertex2), hlsl::dot(retval.vertex2, retval.vertex0), hlsl::dot(retval.vertex0, retval.vertex1)); - const vector3_type csc_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; - retval.csc_sides.x = hlsl::rsqrt(csc_sides2.x); - retval.csc_sides.y = hlsl::rsqrt(csc_sides2.y); - retval.csc_sides.z = hlsl::rsqrt(csc_sides2.z); + retval.vertices[0] = nbl::hlsl::normalize(vertices[0] - origin); + retval.vertices[1] = nbl::hlsl::normalize(vertices[1] - origin); + retval.vertices[2] = nbl::hlsl::normalize(vertices[2] - origin); + retval.cos_sides = vector3_type(hlsl::dot(retval.vertices[1], retval.vertices[2]), hlsl::dot(retval.vertices[2], retval.vertices[0]), hlsl::dot(retval.vertices[0], retval.vertices[1])); + const vector3_type sin_sides2 = hlsl::promote(1.0) - retval.cos_sides * retval.cos_sides; + retval.csc_sides = hlsl::rsqrt(sin_sides2); return retval; } + // checks if any angles are small enough to disregard bool pyramidAngles() { - return hlsl::any >(csc_sides >= (vector3_type)(numeric_limits::max)); + return hlsl::any >(csc_sides >= hlsl::promote(numeric_limits::max)); } - scalar_type solidAngleOfTriangle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices, NBL_REF_ARG(scalar_type) cos_a, NBL_REF_ARG(scalar_type) cos_c, NBL_REF_ARG(scalar_type) csc_b, NBL_REF_ARG(scalar_type) csc_c) + scalar_type solidAngle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices) { if (pyramidAngles()) return 0.f; - // these variables might eventually get optimized out - cos_a = cos_sides[0]; - cos_c = cos_sides[2]; - csc_b = csc_sides[1]; - csc_c = csc_sides[2]; - // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) sin_vertices = hlsl::sqrt(hlsl::promote(1.0) - cos_vertices * cos_vertices); @@ -65,39 +58,30 @@ struct SphericalTriangle return angle_adder.getSumofArccos() - numbers::pi; } - scalar_type solidAngleOfTriangle() + scalar_type solidAngle() { vector3_type dummy0,dummy1; - scalar_type dummy2,dummy3,dummy4,dummy5; - return solidAngleOfTriangle(dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); + return solidAngle(dummy0,dummy1); } - scalar_type projectedSolidAngleOfTriangle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) + scalar_type projectedSolidAngle(const vector3_type receiverNormal, NBL_REF_ARG(vector3_type) cos_sides, NBL_REF_ARG(vector3_type) csc_sides, NBL_REF_ARG(vector3_type) cos_vertices) { if (pyramidAngles()) return 0.f; - vector3_type awayFromEdgePlane0 = hlsl::cross(vertex1, vertex2) * csc_sides[0]; - vector3_type awayFromEdgePlane1 = hlsl::cross(vertex2, vertex0) * csc_sides[1]; - vector3_type awayFromEdgePlane2 = hlsl::cross(vertex0, vertex1) * csc_sides[2]; - - // useless here but could be useful somewhere else - cos_vertices[0] = hlsl::dot(awayFromEdgePlane1, awayFromEdgePlane2); - cos_vertices[1] = hlsl::dot(awayFromEdgePlane2, awayFromEdgePlane0); - cos_vertices[2] = hlsl::dot(awayFromEdgePlane0, awayFromEdgePlane1); - // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); - matrix awayFromEdgePlane = matrix(awayFromEdgePlane0, awayFromEdgePlane1, awayFromEdgePlane2); + matrix awayFromEdgePlane; + awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * csc_sides[0]; + awayFromEdgePlane[1] = hlsl::cross(vertices[2], vertices[0]) * csc_sides[1]; + awayFromEdgePlane[2] = hlsl::cross(vertices[0], vertices[1]) * csc_sides[2]; const vector3_type externalProducts = hlsl::abs(hlsl::mul(/* transposed already */awayFromEdgePlane, receiverNormal)); - const vector3_type pyramidAngles = acos(cos_sides); - return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); + const vector3_type pyramidAngles = hlsl::acos(cos_sides); + return hlsl::dot(pyramidAngles, externalProducts) / (2.f * numbers::pi); } - vector3_type vertex0; - vector3_type vertex1; - vector3_type vertex2; + vector3_type vertices[3]; vector3_type cos_sides; vector3_type csc_sides; }; From 3ac7b834adc0ef1e91312c0a2b88bfb28da4263e Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 22 Jan 2026 11:35:14 +0700 Subject: [PATCH 40/54] removed redundant/unused variables from spherical triangle sample --- .../hlsl/sampling/spherical_triangle.hlsl | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 430c8ccd0d..191f187649 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -37,27 +37,30 @@ struct SphericalTriangle { SphericalTriangle retval; retval.tri = tri; + vector3_type cos_vertices, sin_vertices; + retval.solidAngle = tri.solidAngle(cos_vertices, sin_vertices); + retval.cosA = cos_vertices[0]; + retval.sinA = sin_vertices[0]; return retval; } - // WARNING: can and will return NAN if one or three of the triangle edges are near zero length - vector3_type generate(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector2_type u) + vector3_type generate(scalar_type cos_c, scalar_type csc_b, const vector2_type u) { scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); - const scalar_type p = negCosSubSolidAngle * sin_vertices[0] - negSinSubSolidAngle * cos_vertices[0]; - const scalar_type q = -negSinSubSolidAngle * sin_vertices[0] - negCosSubSolidAngle * cos_vertices[0]; + const scalar_type p = negCosSubSolidAngle * sinA - negSinSubSolidAngle * cosA; + const scalar_type q = -negSinSubSolidAngle * sinA - negCosSubSolidAngle * cosA; // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful - scalar_type u_ = q - cos_vertices[0]; - scalar_type v_ = p + sin_vertices[0] * cos_c; + scalar_type u_ = q - cosA; + scalar_type v_ = p + sinA * cos_c; // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors vector3_type C_s = tri.vertices[0]; if (csc_b < numeric_limits::max) { - const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cos_vertices[0] - v_) / ((v_ * p + u_ * q) * sin_vertices[0]); + const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cosA - v_) / ((v_ * p + u_ * q) * sinA); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) C_s += math::quaternion::slerp_delta(tri.vertices[0], tri.vertices[2] * csc_b, cosAngleAlongAC); } @@ -76,18 +79,15 @@ struct SphericalTriangle vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) { - const scalar_type cos_a = tri.cos_sides[0]; const scalar_type cos_c = tri.cos_sides[2]; const scalar_type csc_b = tri.csc_sides[1]; - const scalar_type csc_c = tri.csc_sides[2]; - vector3_type cos_vertices, sin_vertices; - rcpPdf = tri.solidAngle(cos_vertices, sin_vertices); + rcpPdf = solidAngle; - return generate(rcpPdf, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); + return generate(cos_c, csc_b, u); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type L) + vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type cos_c, scalar_type csc_c, const vector3_type L) { pdf = 1.0 / solidAngle; @@ -98,16 +98,16 @@ struct SphericalTriangle const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); - const scalar_type cosC_ = sin_vertices[0] * sinB_* cos_c - cos_vertices[0] * cosB_; + const scalar_type cosC_ = sinA * sinB_* cos_c - cosA * cosB_; const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosA, sinA); angle_adder.addAngle(cosB_, sinB_); angle_adder.addAngle(cosC_, sinC_); const scalar_type subTriSolidAngleRatio = (angle_adder.getSumofArccos() - numbers::pi) * pdf; const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; - const scalar_type cosBC_s = (cos_vertices[0] + cosB_ * cosC_) / (sinB_ * sinC_); + const scalar_type cosBC_s = (cosA + cosB_ * cosC_) / (sinB_ * sinC_); const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); return vector2_type(u,v); @@ -115,18 +115,16 @@ struct SphericalTriangle vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) { - const scalar_type cos_a = tri.cos_sides[0]; const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_b = tri.csc_sides[1]; const scalar_type csc_c = tri.csc_sides[2]; - vector3_type cos_vertices, sin_vertices; - - const scalar_type solidAngle = tri.solidAngle(cos_vertices, sin_vertices); - return generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); + return generateInverse(pdf, cos_c, csc_c, L); } shapes::SphericalTriangle tri; + scalar_type solidAngle; + scalar_type cosA; + scalar_type sinA; }; } // namespace sampling From 4ed1cbc937ae538771288bb8c92349f662edf1ce Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 22 Jan 2026 12:24:57 +0700 Subject: [PATCH 41/54] spherical rectangle stores origin, extent, basis and takes observer instead --- .../hlsl/sampling/spherical_rectangle.hlsl | 134 +++++++++--------- .../hlsl/shapes/spherical_rectangle.hlsl | 25 ++-- 2 files changed, 81 insertions(+), 78 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 8f90be6b3a..a157ff0d8c 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -21,72 +21,74 @@ namespace sampling template struct SphericalRectangle { - using scalar_type = T; - using vector2_type = vector; - using vector3_type = vector; - using vector4_type = vector; - - // ResamplableSampler concept types - using domain_type = vector2_type; - using codomain_type = vector2_type; - using density_type = scalar_type; - using sample_type = codomain_and_rcpPdf; - - static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) - { - SphericalRectangle retval; - retval.rect = rect; - return retval; - } - - vector2_type generate(const vector2_type rectangleExtents, const vector2_type uv, NBL_REF_ARG(scalar_type) S) - { - const vector4_type denorm_n_z = vector4_type(-rect.r0.y, rect.r0.x + rectangleExtents.x, rect.r0.y + rectangleExtents.y, -rect.r0.x); - const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(rect.r0.z * rect.r0.z) + denorm_n_z * denorm_n_z); - const vector4_type cosGamma = vector4_type( - -n_z[0] * n_z[1], - -n_z[1] * n_z[2], - -n_z[2] * n_z[3], - -n_z[3] * n_z[0]); - - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); - angle_adder.addCosine(cosGamma[1]); - scalar_type p = angle_adder.getSumofArccos(); - angle_adder = math::sincos_accumulator::create(cosGamma[2]); - angle_adder.addCosine(cosGamma[3]); - scalar_type q = angle_adder.getSumofArccos(); - - const scalar_type k = scalar_type(2.0) * numbers::pi - q; - const scalar_type b0 = n_z[0]; - const scalar_type b1 = n_z[2]; - S = p + q - scalar_type(2.0) * numbers::pi; - - const scalar_type CLAMP_EPS = 1e-5; - - // flip z axis if rect.r0.z > 0 - rect.r0.z = ieee754::flipSignIfRHSNegative(rect.r0.z, -rect.r0.z); - vector3_type r1 = rect.r0 + vector3_type(rectangleExtents.x, rectangleExtents.y, 0); - - const scalar_type au = uv.x * S + k; - const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); - const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] - const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); - - scalar_type xu = -(cu * rect.r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); - xu = hlsl::clamp(xu, rect.r0.x, r1.x); // avoid Infs - const scalar_type d_2 = xu * xu + rect.r0.z * rect.r0.z; - const scalar_type d = hlsl::sqrt(d_2); - - const scalar_type h0 = rect.r0.y / hlsl::sqrt(d_2 + rect.r0.y * rect.r0.y); - const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + uv.y * (h1 - h0); - const scalar_type hv2 = hv * hv; - const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); - - return vector2_type((xu - rect.r0.x) / rectangleExtents.x, (yv - rect.r0.y) / rectangleExtents.y); - } - - shapes::SphericalRectangle rect; + using scalar_type = T; + using vector2_type = vector; + using vector3_type = vector; + using vector4_type = vector; + + // ResamplableSampler concept types + using domain_type = vector2_type; + using codomain_type = vector2_type; + using density_type = scalar_type; + using sample_type = codomain_and_rcpPdf; + + static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) + { + SphericalRectangle retval; + retval.rect = rect; + return retval; + } + + vector2_type generate(const vector3_type observer, const vector2_type uv, NBL_REF_ARG(scalar_type) S) + { + vector3_type r0 = hlsl::mul(rect.basis, rect.origin - observer); + const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rect.extents.x, r0.y + rect.extents.y, -r0.x); + const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(r0.z * r0.z) + denorm_n_z * denorm_n_z); + const vector4_type cosGamma = vector4_type( + -n_z[0] * n_z[1], + -n_z[1] * n_z[2], + -n_z[2] * n_z[3], + -n_z[3] * n_z[0] + ); + + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); + angle_adder.addCosine(cosGamma[1]); + scalar_type p = angle_adder.getSumofArccos(); + angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); + + const scalar_type k = scalar_type(2.0) * numbers::pi - q; + const scalar_type b0 = n_z[0]; + const scalar_type b1 = n_z[2]; + S = p + q - scalar_type(2.0) * numbers::pi; + + const scalar_type CLAMP_EPS = 1e-5; + + // flip z axis if r0.z > 0 + r0.z = ieee754::flipSignIfRHSNegative(r0.z, -r0.z); + vector3_type r1 = r0 + vector3_type(rect.extents.x, rect.extents.y, 0); + + const scalar_type au = uv.x * S + k; + const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); + const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] + const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); + + scalar_type xu = -(cu * r0.z) / hlsl::sqrt(scalar_type(1.0) - cu * cu); + xu = hlsl::clamp(xu, r0.x, r1.x); // avoid Infs + const scalar_type d_2 = xu * xu + r0.z * r0.z; + const scalar_type d = hlsl::sqrt(d_2); + + const scalar_type h0 = r0.y / hlsl::sqrt(d_2 + r0.y * r0.y); + const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); + const scalar_type hv = h0 + uv.y * (h1 - h0); + const scalar_type hv2 = hv * hv; + const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); + + return vector2_type((xu - r0.x) / rect.extents.x, (yv - r0.y) / rect.extents.y); + } + + shapes::SphericalRectangle rect; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 587e221996..60c2729f21 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -21,28 +21,27 @@ template struct SphericalRectangle { using scalar_type = Scalar; + using vector2_type = vector; using vector3_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const matrix3x3_type basis) + static SphericalRectangle create(const vector3_type rectangleOrigin, const vector3_type right, const vector3_type up) { SphericalRectangle retval; - retval.r0 = nbl::hlsl::mul(basis, rectangleOrigin - observer); + retval.origin = rectangleOrigin; + retval.extents = vector2_type(hlsl::length(right), hlsl::length(up)); + retval.basis[0] = right / retval.extents[0]; + retval.basis[1] = up / retval.extents[1]; + retval.basis[2] = hlsl::normalize(hlsl::cross(retval.basis[0], retval.basis[1])); return retval; } - static SphericalRectangle create(const vector3_type observer, const vector3_type rectangleOrigin, const vector3_type T, vector3_type B, const vector3_type N) + scalar_type solidAngle(const vector3_type observer) { - SphericalRectangle retval; - matrix3x3_type TBN = nbl::hlsl::transpose(matrix3x3_type(T, B, N)); - retval.r0 = nbl::hlsl::mul(TBN, rectangleOrigin - observer); - return retval; - } + const vector3_type r0 = hlsl::mul(basis, origin - observer); - scalar_type solidAngle(const vector rectangleExtents) - { using vector4_type = vector; - const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rectangleExtents.x, r0.y + rectangleExtents.y, -r0.x); + const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + extents.x, r0.y + extents.y, -r0.x); const vector4_type n_z = denorm_n_z / nbl::hlsl::sqrt((vector4_type)(r0.z * r0.z) + denorm_n_z * denorm_n_z); const vector4_type cosGamma = vector4_type( -n_z[0] * n_z[1], @@ -57,7 +56,9 @@ struct SphericalRectangle return angle_adder.getSumofArccos() - scalar_type(2.0) * numbers::pi; } - vector3_type r0; + vector3_type origin; + vector2_type extents; + matrix3x3_type basis; }; } From 65ef4b3687c143d4183aaefd3cd62fbeacc36232 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Thu, 22 Jan 2026 14:28:42 +0700 Subject: [PATCH 42/54] added compressed spherical rectangle, comments for info of implementation --- .../hlsl/shapes/spherical_rectangle.hlsl | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 60c2729f21..5e23774640 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -17,6 +17,40 @@ namespace hlsl namespace shapes { +// What are we likely to do with a Spherical Rectangle? +// 1) Initialize it multiple times from different observers +// 2) Sample it repeatedly + +// How are we likely to get a spherical rect? +// 1) from OBB matrix (with a model space z-axis scale thats irrelevant - but should be forced to 1.f to not mess with distance) +// 2) in a compressed form + +// So, to bring multiple world-space observers into Spherical Rectangle's own space, we need the basis matrix. +// The matrix should be a matrix where the last column is the translation, a 3x3 matrix with a pre-transform translation (worldSpace rectangle origin to be subtracted). + +// You can compute it from an OBB matrix (as given by/to imguizmo to position a [0,1]^2 rectangle mesh where Z+ is the front face. + +// Now, can apply translation: +// 1) post-rotation so a it automatically gets added during a affine pseudo-mul of a 3x4, so pseudo_mul(basis,observer) +// 2) pre-rotation so you keep a worldspace rectangle origin and subtract it before, e.g. mul(basis,worldSpaceOrigin-observer) - this one is possibly better due to next point + +// So we need to store: +// 1) first two COLUMNS of the original OBB matrix (rows of 3x3 basis matrix with the scale still in there), thats kinda your right and up vectors +// 2) pre-rotation translation / the world-space translation of the rectangle +// Theoretically you could get away with not storing one of the up vector components but its not always the same component you can reconstruct (plane orthogonal to up isn't always the XY plane). +// Could compress up vector as a rotation of the default vector orthogonal to right as given by the frisvad-basis function around the right vector plus a scale +// but that becomes a very expensive decompression step involving a quaternion with uniform scale. + +template +struct CompressedSphericalRectangle +{ + using vector3_type = vector; + + vector3_type origin; + vector3_type right; + vector3_type up; +}; + template struct SphericalRectangle { @@ -36,6 +70,11 @@ struct SphericalRectangle return retval; } + static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) + { + return create(compressed.origin, compressed.right, compressed.up); + } + scalar_type solidAngle(const vector3_type observer) { const vector3_type r0 = hlsl::mul(basis, origin - observer); From 7fc828192e7e124a40fe3306ab01a2092d649592 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 25 Feb 2026 11:36:36 +0700 Subject: [PATCH 43/54] minor fixes to spherical rectangle stuff --- .../hlsl/sampling/spherical_rectangle.hlsl | 2 +- .../hlsl/shapes/spherical_rectangle.hlsl | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index a157ff0d8c..04534c919d 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -66,7 +66,7 @@ struct SphericalRectangle const scalar_type CLAMP_EPS = 1e-5; // flip z axis if r0.z > 0 - r0.z = ieee754::flipSignIfRHSNegative(r0.z, -r0.z); + r0.z = -hlsl::abs(r0.z); vector3_type r1 = r0 + vector3_type(rect.extents.x, rect.extents.y, 0); const scalar_type au = uv.x * S + k; diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 5e23774640..3890d1a2db 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -30,6 +30,31 @@ namespace shapes // You can compute it from an OBB matrix (as given by/to imguizmo to position a [0,1]^2 rectangle mesh where Z+ is the front face. +/* +matrix check = mul(modelSpace,tranpose(modelSpace)); +// orthogonality (don't need to check the other 3 lower half numbers, cause MM^T is symmetric) +assert(check[0][1]==0.f); +assert(check[0][2]==0.f); +assert(check[1][2]==0.f); +// the scales are squared +const vector2_type scalesSq = vector2_type(check[0][0],check[1][1]); +const vector2_type scalesRcp = rsqrt(scalesSq); +// only rotation, scale needs to be thrown away +basis = tranpose(modelSpace); +// right now `mul(basis,fromObserver)` will apply extent scales on the dot product +// need to remove that +basis[0] *= scalesRcp[0]; +basis[1] *= scalesRcp[1]; +// but also back it up so we know the size of the original rectangle +extents = promote(vector2_type>(1.f)/scalesRcp; +if (dontAssertZScaleIsOne) + basis[2] *= rsqrt(check[2][2]); +else +{ + assert(check[2][2]==1.f); +} +*/ + // Now, can apply translation: // 1) post-rotation so a it automatically gets added during a affine pseudo-mul of a 3x4, so pseudo_mul(basis,observer) // 2) pre-rotation so you keep a worldspace rectangle origin and subtract it before, e.g. mul(basis,worldSpaceOrigin-observer) - this one is possibly better due to next point From 855dac402e5c09d7c96b07e44e88c5d366526e70 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 2 Mar 2026 15:47:56 +0700 Subject: [PATCH 44/54] spherical rectangle constructor for same rectangle and observer --- .../hlsl/sampling/spherical_rectangle.hlsl | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 04534c919d..22e3b02397 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -32,25 +32,34 @@ struct SphericalRectangle using density_type = scalar_type; using sample_type = codomain_and_rcpPdf; - static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect) - { - SphericalRectangle retval; - retval.rect = rect; - return retval; - } + NBL_CONSTEXPR_STATIC_INLINE scalar_type ClampEps = 1e-5; - vector2_type generate(const vector3_type observer, const vector2_type uv, NBL_REF_ARG(scalar_type) S) + static SphericalRectangle create(NBL_CONST_REF_ARG(shapes::SphericalRectangle) rect, const vector3_type observer) { - vector3_type r0 = hlsl::mul(rect.basis, rect.origin - observer); - const vector4_type denorm_n_z = vector4_type(-r0.y, r0.x + rect.extents.x, r0.y + rect.extents.y, -r0.x); - const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(r0.z * r0.z) + denorm_n_z * denorm_n_z); - const vector4_type cosGamma = vector4_type( + SphericalRectangle retval; + + retval.r0 = hlsl::mul(rect.basis, rect.origin - observer); + const vector4_type denorm_n_z = vector4_type(-retval.r0.y, retval.r0.x + rect.extents.x, retval.r0.y + rect.extents.y, -retval.r0.x); + const vector4_type n_z = denorm_n_z / hlsl::sqrt(hlsl::promote(retval.r0.z * retval.r0.z) + denorm_n_z * denorm_n_z); + retval.cosGamma = vector4_type( -n_z[0] * n_z[1], -n_z[1] * n_z[2], -n_z[2] * n_z[3], -n_z[3] * n_z[0] ); + // flip z axis if r0.z > 0 + retval.r0 = -hlsl::abs(retval.r0.z); + retval.r1 = retval.r0 + vector3_type(rect.extents.x, rect.extents.y, 0); + retval.extents = rect.extents; + + retval.b0 = n_z[0]; + retval.b1 = n_z[2]; + return retval; + } + + vector2_type generate(const vector2_type uv, NBL_REF_ARG(scalar_type) S) + { math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); angle_adder.addCosine(cosGamma[1]); scalar_type p = angle_adder.getSumofArccos(); @@ -59,16 +68,8 @@ struct SphericalRectangle scalar_type q = angle_adder.getSumofArccos(); const scalar_type k = scalar_type(2.0) * numbers::pi - q; - const scalar_type b0 = n_z[0]; - const scalar_type b1 = n_z[2]; S = p + q - scalar_type(2.0) * numbers::pi; - const scalar_type CLAMP_EPS = 1e-5; - - // flip z axis if r0.z > 0 - r0.z = -hlsl::abs(r0.z); - vector3_type r1 = r0 + vector3_type(rect.extents.x, rect.extents.y, 0); - const scalar_type au = uv.x * S + k; const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] @@ -83,12 +84,17 @@ struct SphericalRectangle const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); const scalar_type hv = h0 + uv.y * (h1 - h0); const scalar_type hv2 = hv * hv; - const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - CLAMP_EPS); + const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); - return vector2_type((xu - r0.x) / rect.extents.x, (yv - r0.y) / rect.extents.y); + return vector2_type((xu - r0.x) / extents.x, (yv - r0.y) / extents.y); } - shapes::SphericalRectangle rect; + vector4_type cosGamma; + scalar_type b0; + scalar_type b1; + vector3_type r0; + vector3_type r1; + vector2_type extents; }; } // namespace sampling From 468031f6e4cc7cc5623e4824e892d53b2a749d45 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 2 Mar 2026 16:07:07 +0700 Subject: [PATCH 45/54] spherical rectangle create only from compressed, minor fix for spherical tri --- .../builtin/hlsl/shapes/spherical_rectangle.hlsl | 16 ++++++---------- .../builtin/hlsl/shapes/spherical_triangle.hlsl | 10 ++++++++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl index 3890d1a2db..9743049a60 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_rectangle.hlsl @@ -84,22 +84,18 @@ struct SphericalRectangle using vector3_type = vector; using matrix3x3_type = matrix; - static SphericalRectangle create(const vector3_type rectangleOrigin, const vector3_type right, const vector3_type up) + static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) { SphericalRectangle retval; - retval.origin = rectangleOrigin; - retval.extents = vector2_type(hlsl::length(right), hlsl::length(up)); - retval.basis[0] = right / retval.extents[0]; - retval.basis[1] = up / retval.extents[1]; + retval.origin = compressed.origin; + retval.extents = vector2_type(hlsl::length(compressed.right), hlsl::length(compressed.up)); + retval.basis[0] = compressed.right / retval.extents[0]; + retval.basis[1] = compressed.up / retval.extents[1]; + assert(hlsl::dot(retval.basis[0], retval.basis[1]) > scalar_type(0.0)); retval.basis[2] = hlsl::normalize(hlsl::cross(retval.basis[0], retval.basis[1])); return retval; } - static SphericalRectangle create(NBL_CONST_REF_ARG(CompressedSphericalRectangle) compressed) - { - return create(compressed.origin, compressed.right, compressed.up); - } - scalar_type solidAngle(const vector3_type observer) { const vector3_type r0 = hlsl::mul(basis, origin - observer); diff --git a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl index 028d3e3653..118f022640 100644 --- a/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/shapes/spherical_triangle.hlsl @@ -43,13 +43,19 @@ struct SphericalTriangle return hlsl::any >(csc_sides >= hlsl::promote(numeric_limits::max)); } + vector3_type __getCosVertices() + { + // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + return hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + } + scalar_type solidAngle(NBL_REF_ARG(vector3_type) cos_vertices, NBL_REF_ARG(vector3_type) sin_vertices) { if (pyramidAngles()) return 0.f; // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + cos_vertices = __getCosVertices(); sin_vertices = hlsl::sqrt(hlsl::promote(1.0) - cos_vertices * cos_vertices); math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cos_vertices[0], sin_vertices[0]); @@ -69,7 +75,7 @@ struct SphericalTriangle if (pyramidAngles()) return 0.f; - cos_vertices = hlsl::clamp((cos_sides - cos_sides.yzx * cos_sides.zxy) * csc_sides.yzx * csc_sides.zxy, hlsl::promote(-1.0), hlsl::promote(1.0)); + cos_vertices = __getCosVertices(); matrix awayFromEdgePlane; awayFromEdgePlane[0] = hlsl::cross(vertices[1], vertices[2]) * csc_sides[0]; From 0f143a0c8f7de607dfec24983359d7e00e781893 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 2 Mar 2026 16:40:57 +0700 Subject: [PATCH 46/54] reduced duplicate methods to only ones matching (close to) concept in projected/spherical triangle --- .../projected_spherical_triangle.hlsl | 56 +++++-------------- .../hlsl/sampling/spherical_triangle.hlsl | 25 +++------ 2 files changed, 21 insertions(+), 60 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 0952ed423a..87a3fa4044 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -33,69 +33,39 @@ struct ProjectedSphericalTriangle using density_type = scalar_type; using sample_type = codomain_and_rcpPdf; - static ProjectedSphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) - { - ProjectedSphericalTriangle retval; - retval.tri = tri; - return retval; - } - - vector4_type computeBilinearPatch(const vector3_type receiverNormal, bool isBSDF) + Bilinear bilinear computeBilinearPatch() { const scalar_type minimumProjSolidAngle = 0.0; - matrix m = matrix(tri.vertex0, tri.vertex1, tri.vertex2); - const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(isBSDF, nbl::hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); + matrix m = matrix(sphtri.tri.vertices[0], sphtri.tri.vertices[1], sphtri.tri.vertices[2]); + const vector3_type bxdfPdfAtVertex = math::conditionalAbsOrMax(receiverWasBSDF, hlsl::mul(m, receiverNormal), hlsl::promote(minimumProjSolidAngle)); - return bxdfPdfAtVertex.yyxz; + return Bilinear::create(bxdfPdfAtVertex.yyxz); } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool isBSDF, const vector2_type _u) + vector3_type generate(const vector2_type u) { vector2_type u; // pre-warp according to proj solid angle approximation - vector4_type patch = computeBilinearPatch(receiverNormal, isBSDF); - Bilinear bilinear = Bilinear::create(patch); + Bilinear bilinear = computeBilinearPatch(); u = bilinear.generate(_u); // now warp the points onto a spherical triangle - const vector3_type L = sphtri.generate(solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, u); - rcpPdf = solidAngle / bilinear.backwardPdf(u); - + const vector3_type L = sphtri.generate(u); return L; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector3_type receiverNormal, bool isBSDF, const vector2_type u) - { - const scalar_type cos_a = tri.cos_sides[0]; - const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_b = tri.csc_sides[1]; - const scalar_type csc_c = tri.csc_sides[2]; - vector3_type cos_vertices, sin_vertices; - const scalar_type solidAngle = tri.solidAngle(cos_vertices, sin_vertices); - return generate(rcpPdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, receiverNormal, isBSDF, u); - } - - scalar_type pdf(scalar_type solidAngle, const vector3_type cos_vertices, const vector3_type sin_vertices, scalar_type cos_a, scalar_type cos_c, scalar_type csc_b, scalar_type csc_c, const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) - { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, solidAngle, cos_vertices, sin_vertices, cos_a, cos_c, csc_b, csc_c, L); - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); - return pdf * bilinear.backwardPdf(u); - } - - scalar_type pdf(const vector3_type receiverNormal, bool receiverWasBSDF, const vector3_type L) + scalar_type backwardPdf(const vector3_type L) { - scalar_type pdf; - const vector2_type u = sphtri.generateInverse(pdf, L); - vector4_type patch = computeBilinearPatch(receiverNormal, receiverWasBSDF); - Bilinear bilinear = Bilinear::create(patch); + const scalar_type pdf = sphtri.backwardPdf(L); + const vector2_type u = sphtri.generateInverse(L); + Bilinear bilinear = computeBilinearPatch(); return pdf * bilinear.backwardPdf(u); } - shapes::SphericalTriangle tri; sampling::SphericalTriangle sphtri; + vector3_type receiverNormal; + bool receiverWasBSDF; }; } // namespace sampling diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 191f187649..82b171545b 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -44,8 +44,11 @@ struct SphericalTriangle return retval; } - vector3_type generate(scalar_type cos_c, scalar_type csc_b, const vector2_type u) + vector3_type generate(const vector2_type u) { + const scalar_type cos_c = tri.cos_sides[2]; + const scalar_type csc_b = tri.csc_sides[1]; + scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); @@ -77,19 +80,10 @@ struct SphericalTriangle return retval; } - vector3_type generate(NBL_REF_ARG(scalar_type) rcpPdf, const vector2_type u) + vector2_type generateInverse(const vector3_type L) { const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_b = tri.csc_sides[1]; - - rcpPdf = solidAngle; - - return generate(cos_c, csc_b, u); - } - - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, scalar_type cos_c, scalar_type csc_c, const vector3_type L) - { - pdf = 1.0 / solidAngle; + const scalar_type csc_c = tri.csc_sides[2]; const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertices[1]); const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); @@ -113,12 +107,9 @@ struct SphericalTriangle return vector2_type(u,v); } - vector2_type generateInverse(NBL_REF_ARG(scalar_type) pdf, const vector3_type L) + scalar_type backwardPdf(const vector3_type L) { - const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_c = tri.csc_sides[2]; - - return generateInverse(pdf, cos_c, csc_c, L); + return scalar_type(1.0) / solidAngle; } shapes::SphericalTriangle tri; From fb0e8a5d1686e8f4943016fde8677e52babe15e4 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Mon, 2 Mar 2026 16:58:28 +0700 Subject: [PATCH 47/54] spherical rect generate don't divide by extents, let user do that --- include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 22e3b02397..26f3ca667b 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -51,7 +51,6 @@ struct SphericalRectangle // flip z axis if r0.z > 0 retval.r0 = -hlsl::abs(retval.r0.z); retval.r1 = retval.r0 + vector3_type(rect.extents.x, rect.extents.y, 0); - retval.extents = rect.extents; retval.b0 = n_z[0]; retval.b1 = n_z[2]; @@ -86,7 +85,7 @@ struct SphericalRectangle const scalar_type hv2 = hv * hv; const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); - return vector2_type((xu - r0.x) / extents.x, (yv - r0.y) / extents.y); + return vector2_type((xu - r0.x), (yv - r0.y)); } vector4_type cosGamma; @@ -94,7 +93,6 @@ struct SphericalRectangle scalar_type b1; vector3_type r0; vector3_type r1; - vector2_type extents; }; } // namespace sampling From ab5ee786b001e4b8af6fd236dcfc965d0d9af557 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 3 Mar 2026 11:15:53 +0700 Subject: [PATCH 48/54] store only needed members from tri --- .../hlsl/sampling/spherical_triangle.hlsl | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index 82b171545b..c1ed6be599 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -36,19 +36,19 @@ struct SphericalTriangle static SphericalTriangle create(NBL_CONST_REF_ARG(shapes::SphericalTriangle) tri) { SphericalTriangle retval; - retval.tri = tri; vector3_type cos_vertices, sin_vertices; retval.solidAngle = tri.solidAngle(cos_vertices, sin_vertices); retval.cosA = cos_vertices[0]; retval.sinA = sin_vertices[0]; + retval.tri_vertices = tri.vertices; + retval.triCosC = tri.cos_sides[2]; + retval.triCscB = tri.csc_sides[1]; + retval.triCscC = tri.csc_sides[2]; return retval; } vector3_type generate(const vector2_type u) { - const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_b = tri.csc_sides[1]; - scalar_type negSinSubSolidAngle,negCosSubSolidAngle; math::sincos(solidAngle * u.x - numbers::pi, negSinSubSolidAngle, negCosSubSolidAngle); @@ -57,42 +57,39 @@ struct SphericalTriangle // TODO: we could optimize everything up and including to the first slerp, because precision here is just godawful scalar_type u_ = q - cosA; - scalar_type v_ = p + sinA * cos_c; + scalar_type v_ = p + sinA * triCosC; // the slerps could probably be optimized by sidestepping `normalize` calls and accumulating scaling factors - vector3_type C_s = tri.vertices[0]; - if (csc_b < numeric_limits::max) + vector3_type C_s = tri_vertices[0]; + if (triCscB < numeric_limits::max) { const scalar_type cosAngleAlongAC = ((v_ * q - u_ * p) * cosA - v_) / ((v_ * p + u_ * q) * sinA); if (nbl::hlsl::abs(cosAngleAlongAC) < 1.f) - C_s += math::quaternion::slerp_delta(tri.vertices[0], tri.vertices[2] * csc_b, cosAngleAlongAC); + C_s += math::quaternion::slerp_delta(tri_vertices[0], tri_vertices[2] * triCscB, cosAngleAlongAC); } - vector3_type retval = tri.vertices[1]; - const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri.vertices[1]); + vector3_type retval = tri_vertices[1]; + const scalar_type cosBC_s = nbl::hlsl::dot(C_s, tri_vertices[1]); const scalar_type csc_b_s = 1.0 / nbl::hlsl::sqrt(1.0 - cosBC_s * cosBC_s); if (csc_b_s < numeric_limits::max) { const scalar_type cosAngleAlongBC_s = nbl::hlsl::clamp(1.0 + cosBC_s * u.y - u.y, -1.f, 1.f); if (nbl::hlsl::abs(cosAngleAlongBC_s) < 1.f) - retval += math::quaternion::slerp_delta(tri.vertices[1], C_s * csc_b_s, cosAngleAlongBC_s); + retval += math::quaternion::slerp_delta(tri_vertices[1], C_s * csc_b_s, cosAngleAlongBC_s); } return retval; } vector2_type generateInverse(const vector3_type L) { - const scalar_type cos_c = tri.cos_sides[2]; - const scalar_type csc_c = tri.csc_sides[2]; - - const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri.vertices[1]); + const scalar_type cosAngleAlongBC_s = nbl::hlsl::dot(L, tri_vertices[1]); const scalar_type csc_a_ = 1.0 / nbl::hlsl::sqrt(1.0 - cosAngleAlongBC_s * cosAngleAlongBC_s); - const scalar_type cos_b_ = nbl::hlsl::dot(L, tri.vertices[0]); + const scalar_type cos_b_ = nbl::hlsl::dot(L, tri_vertices[0]); - const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * cos_c) * csc_a_ * csc_c; + const scalar_type cosB_ = (cos_b_ - cosAngleAlongBC_s * triCosC) * csc_a_ * triCscC; const scalar_type sinB_ = nbl::hlsl::sqrt(1.0 - cosB_ * cosB_); - const scalar_type cosC_ = sinA * sinB_* cos_c - cosA * cosB_; + const scalar_type cosC_ = sinA * sinB_* triCosC - cosA * cosB_; const scalar_type sinC_ = nbl::hlsl::sqrt(1.0 - cosC_ * cosC_); math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosA, sinA); @@ -102,7 +99,7 @@ struct SphericalTriangle const scalar_type u = subTriSolidAngleRatio > numeric_limits::min ? subTriSolidAngleRatio : 0.0; const scalar_type cosBC_s = (cosA + cosB_ * cosC_) / (sinB_ * sinC_); - const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : cos_c)); + const scalar_type v = (1.0 - cosAngleAlongBC_s) / (1.0 - (cosBC_s < bit_cast(0x3f7fffff) ? cosBC_s : triCosC)); return vector2_type(u,v); } @@ -112,10 +109,14 @@ struct SphericalTriangle return scalar_type(1.0) / solidAngle; } - shapes::SphericalTriangle tri; scalar_type solidAngle; scalar_type cosA; scalar_type sinA; + + vector3_type tri_vertices[3]; + scalar_type triCosC; + scalar_type triCscB; + scalar_type triCscC; }; } // namespace sampling From 17c85ba927a9f16fe89d232135c535eabeb87d4b Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 3 Mar 2026 15:07:06 +0700 Subject: [PATCH 49/54] forward/backward pdfs for spherical triangle/rectangle, projected sph tri to match concepts --- .../projected_spherical_triangle.hlsl | 9 ++++- .../hlsl/sampling/spherical_rectangle.hlsl | 34 ++++++++++++++----- .../hlsl/sampling/spherical_triangle.hlsl | 5 +++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl index 87a3fa4044..5fba1df2d7 100644 --- a/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/projected_spherical_triangle.hlsl @@ -33,7 +33,7 @@ struct ProjectedSphericalTriangle using density_type = scalar_type; using sample_type = codomain_and_rcpPdf; - Bilinear bilinear computeBilinearPatch() + Bilinear computeBilinearPatch() { const scalar_type minimumProjSolidAngle = 0.0; @@ -55,6 +55,13 @@ struct ProjectedSphericalTriangle return L; } + scalar_type forwardPdf(const vector2_type u) + { + const scalar_type pdf = sphtri.forwardPdf(u); + Bilinear bilinear = computeBilinearPatch(); + return pdf * bilinear.backwardPdf(u); + } + scalar_type backwardPdf(const vector3_type L) { const scalar_type pdf = sphtri.backwardPdf(L); diff --git a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl index 26f3ca667b..c80406a8f8 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_rectangle.hlsl @@ -48,6 +48,16 @@ struct SphericalRectangle -n_z[3] * n_z[0] ); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); + angle_adder.addCosine(cosGamma[1]); + scalar_type p = angle_adder.getSumofArccos(); + angle_adder = math::sincos_accumulator::create(cosGamma[2]); + angle_adder.addCosine(cosGamma[3]); + scalar_type q = angle_adder.getSumofArccos(); + + const scalar_type k = scalar_type(2.0) * numbers::pi - q; + retval.solidAngle = p + q - scalar_type(2.0) * numbers::pi; + // flip z axis if r0.z > 0 retval.r0 = -hlsl::abs(retval.r0.z); retval.r1 = retval.r0 + vector3_type(rect.extents.x, rect.extents.y, 0); @@ -57,19 +67,14 @@ struct SphericalRectangle return retval; } - vector2_type generate(const vector2_type uv, NBL_REF_ARG(scalar_type) S) + vector2_type generate(const vector2_type u) { - math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[0]); - angle_adder.addCosine(cosGamma[1]); - scalar_type p = angle_adder.getSumofArccos(); - angle_adder = math::sincos_accumulator::create(cosGamma[2]); + math::sincos_accumulator angle_adder = math::sincos_accumulator::create(cosGamma[2]); angle_adder.addCosine(cosGamma[3]); scalar_type q = angle_adder.getSumofArccos(); - const scalar_type k = scalar_type(2.0) * numbers::pi - q; - S = p + q - scalar_type(2.0) * numbers::pi; - const scalar_type au = uv.x * S + k; + const scalar_type au = u.x * solidAngle + k; const scalar_type fu = (hlsl::cos(au) * b0 - b1) / hlsl::sin(au); const scalar_type cu_2 = hlsl::max(fu * fu + b0 * b0, 1.f); // forces `cu` to be in [-1,1] const scalar_type cu = ieee754::flipSignIfRHSNegative(scalar_type(1.0) / hlsl::sqrt(cu_2), fu); @@ -81,13 +86,24 @@ struct SphericalRectangle const scalar_type h0 = r0.y / hlsl::sqrt(d_2 + r0.y * r0.y); const scalar_type h1 = r1.y / hlsl::sqrt(d_2 + r1.y * r1.y); - const scalar_type hv = h0 + uv.y * (h1 - h0); + const scalar_type hv = h0 + u.y * (h1 - h0); const scalar_type hv2 = hv * hv; const scalar_type yv = hlsl::mix(r1.y, (hv * d) / hlsl::sqrt(scalar_type(1.0) - hv2), hv2 < scalar_type(1.0) - ClampEps); return vector2_type((xu - r0.x), (yv - r0.y)); } + scalar_type forwardPdf(const vector2_type u) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type backwardPdf(const vector2_type L) + { + return scalar_type(1.0) / solidAngle; + } + + scalar_type solidAngle; vector4_type cosGamma; scalar_type b0; scalar_type b1; diff --git a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl index c1ed6be599..83cde18a96 100644 --- a/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl +++ b/include/nbl/builtin/hlsl/sampling/spherical_triangle.hlsl @@ -104,6 +104,11 @@ struct SphericalTriangle return vector2_type(u,v); } + scalar_type forwardPdf(const vector2_type u) + { + return scalar_type(1.0) / solidAngle; + } + scalar_type backwardPdf(const vector3_type L) { return scalar_type(1.0) / solidAngle; From d95cfa72cedb0dd6e0dbf9e0c83e601c36a21285 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Tue, 3 Mar 2026 16:41:00 +0700 Subject: [PATCH 50/54] copied over fixed linear sampling because merge fucked up, added forward/backward pdfs + inverse generate --- include/nbl/builtin/hlsl/sampling/linear.hlsl | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/linear.hlsl b/include/nbl/builtin/hlsl/sampling/linear.hlsl index 1c12aeea29..78c57a53bb 100644 --- a/include/nbl/builtin/hlsl/sampling/linear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/linear.hlsl @@ -32,25 +32,44 @@ struct Linear static Linear create(const vector2_type linearCoeffs) // start and end importance values (start, end), assumed to be at x=0 and x=1 { Linear retval; - scalar_type rcpDiff = 1.0 / (linearCoeffs[0] - linearCoeffs[1]); - retval.linearCoeffStartOverDiff = linearCoeffs[0] * rcpDiff; + retval.linearCoeffStart = linearCoeffs[0]; + retval.linearCoeffDiff = linearCoeffs[1] - linearCoeffs[0]; + retval.rcpCoeffSum = scalar_type(1.0) / (linearCoeffs[0] + linearCoeffs[1]); + retval.rcpDiff = -scalar_type(1.0) / retval.linearCoeffDiff; vector2_type squaredCoeffs = linearCoeffs * linearCoeffs; - scalar_type squaredRcpDiff = rcpDiff * rcpDiff; - retval.squaredCoeffStartOverDiff = squaredCoeffs[0] * squaredRcpDiff; - retval.squaredCoeffDiffOverDiff = (squaredCoeffs[1] - squaredCoeffs[0]) * squaredRcpDiff; + retval.squaredCoeffStart = squaredCoeffs[0]; + retval.squaredCoeffDiff = squaredCoeffs[1] - squaredCoeffs[0]; return retval; } scalar_type generate(const scalar_type u) { - return hlsl::mix(u, (linearCoeffStartOverDiff - hlsl::sqrt(squaredCoeffStartOverDiff + u * squaredCoeffDiffOverDiff)), hlsl::abs(linearCoeffStartOverDiff) < numeric_limits::max); + return hlsl::mix(u, (linearCoeffStart - hlsl::sqrt(squaredCoeffStart + u * squaredCoeffDiff)) * rcpDiff, hlsl::abs(rcpDiff) < numeric_limits::max); } - // TODO: add forwardPdf and backwardPdf methods, forward computes from u and backwards from the result of generate + scalar_type generateInverse(const scalar_type x) + { + return x * (scalar_type(2.0) * linearCoeffStart + linearCoeffDiff * x) * rcpCoeffSum; + } + + scalar_type forwardPdf(const scalar_type u) + { + return backwardPdf(generate(u)); + } + + scalar_type backwardPdf(const scalar_type x) + { + if (x < scalar_type(0.0) || x > scalar_type(1.0)) + return scalar_type(0.0); + return scalar_type(2.0) * (linearCoeffStart + x * linearCoeffDiff) * rcpCoeffSum; + } - scalar_type linearCoeffStartOverDiff; - scalar_type squaredCoeffStartOverDiff; - scalar_type squaredCoeffDiffOverDiff; + scalar_type linearCoeffStart; + scalar_type linearCoeffDiff; + scalar_type rcpCoeffSum; + scalar_type rcpDiff; + scalar_type squaredCoeffStart; + scalar_type squaredCoeffDiff; }; } // namespace sampling From 0bb7b39f90a203cc2abb51bb1ebf8d690d95c712 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 4 Mar 2026 11:12:05 +0700 Subject: [PATCH 51/54] add forward pdf, generate inverse to bilinear --- .../nbl/builtin/hlsl/sampling/bilinear.hlsl | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl index 7006e63852..af84e49544 100644 --- a/include/nbl/builtin/hlsl/sampling/bilinear.hlsl +++ b/include/nbl/builtin/hlsl/sampling/bilinear.hlsl @@ -42,22 +42,38 @@ struct Bilinear return retval; } - vector2_type generate(const vector2_type _u) + vector2_type generate(const vector2_type u) { - vector2_type u; - u.y = lineary.generate(_u.y); + vector2_type p; + p.y = lineary.generate(u.y); + + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + Linear linearx = Linear::create(ySliceEndPoints); + p.x = linearx.generate(u.x); + + return p; + } - const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + u.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + u.y * bilinearCoeffDiffs[1]); + vector2_type generateInverse(const vector2_type p) + { + vector2_type u; + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); Linear linearx = Linear::create(ySliceEndPoints); - u.x = linearx.generate(_u.x); + u.x = linearx.generateInverse(p.x); + u.y = lineary.generateInverse(p.y); return u; } - scalar_type backwardPdf(const vector2_type u) + scalar_type forwardPdf(const vector2_type u) + { + return backwardPdf(generate(u)); + } + + scalar_type backwardPdf(const vector2_type p) { - const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + u.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + u.y * bilinearCoeffDiffs[1]); - return nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], u.x) * fourOverTwiceAreasUnderXCurveSum; + const vector2_type ySliceEndPoints = vector2_type(bilinearCoeffs[0] + p.y * bilinearCoeffDiffs[0], bilinearCoeffs[1] + p.y * bilinearCoeffDiffs[1]); + return nbl::hlsl::mix(ySliceEndPoints[0], ySliceEndPoints[1], p.x) * fourOverTwiceAreasUnderXCurveSum; } // unit square: x0y0 x1y0 From 89f6d5f043c8080202b0bcf9cf7fa0f953e24512 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 4 Mar 2026 14:22:33 +0700 Subject: [PATCH 52/54] uniform hemi/sphere samplign make static methods private, added methods to match concept --- .../hlsl/sampling/uniform_spheres.hlsl | 64 +++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl index c92d732b43..6f3200f4d9 100644 --- a/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/uniform_spheres.hlsl @@ -32,7 +32,7 @@ struct UniformHemisphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { T z = _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -40,11 +40,39 @@ struct UniformHemisphere return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); } - static T pdf() + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2(_sample.z, phi / twopi); + } + + vector_t2 generateInverse(const vector_t3 _sample) + { + return __generateInverse(_sample); + } + + static scalar_type __pdf() { return T(1.0) / (T(2.0) * numbers::pi); } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(); + } + + scalar_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf() { @@ -66,7 +94,7 @@ struct UniformSphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { T z = T(1.0) - T(2.0) * _sample.x; T r = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - z * z)); @@ -74,11 +102,39 @@ struct UniformSphere return vector_t3(r * hlsl::cos(phi), r * hlsl::sin(phi), z); } - static T pdf() + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 _sample) + { + T phi = hlsl::atan2(_sample.y, _sample.x); + const T twopi = T(2.0) * numbers::pi; + phi += hlsl::mix(T(0.0), twopi, phi < T(0.0)); + return vector_t2((T(1.0) - _sample.z) * T(0.5), phi / twopi); + } + + vector_t2 generateInverse(const vector_t3 _sample) + { + return __generateInverse(_sample); + } + + static T __pdf() { return T(1.0) / (T(4.0) * numbers::pi); } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(); + } + + scalar_type backwardPdf(const vector_t3 _sample) + { + return __pdf(); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf() { From e07ebc1247cebf5e3f5b85393b794949a3f8b4d8 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 4 Mar 2026 15:43:14 +0700 Subject: [PATCH 53/54] cosine hemi/sphere sampling make static methods private, added methods to match concept (inverse is sketchy), invert concentric mapping --- .../hlsl/sampling/concentric_mapping.hlsl | 37 +++++++++++ .../hlsl/sampling/cos_weighted_spheres.hlsl | 64 +++++++++++++++++-- 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl index 4d80e14861..342b754c5a 100644 --- a/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl +++ b/include/nbl/builtin/hlsl/sampling/concentric_mapping.hlsl @@ -46,6 +46,43 @@ vector concentricMapping(const vector _u) return p; } +template +vector invertConcentricMapping(const vector p) +{ + T theta = hlsl::atan2(p.y, p.x); // -pi -> pi + T r = hlsl::sqrt(p.x * p.x + p.y * p.y); + const T PiOver4 = T(0.25) * numbers::pi; + + vector u; + // TODO: should reduce branching somehow? + if (hlsl::abs(theta) < PiOver4 || hlsl::abs(theta) > 3 * PiOver4) + { + r = ieee754::copySign(r, p.x); + u.x = r; + if (p.x < 0) { + if (p.y < 0) { + u.y = (numbers::pi + theta) * r / PiOver4; + } else { + u.y = (theta - numbers::pi) * r / PiOver4; + } + } else { + u.y = (theta * r) / PiOver4; + } + } + else + { + r = ieee754::copySign(r, p.y); + u.y = r; + if (p.y < 0) { + u.x = -(T(0.5) * numbers::pi + theta) * r / PiOver4; + } else { + u.x = (T(0.5) * numbers::pi - theta) * r / PiOver4; + } + } + + return (u + hlsl::promote >(1.0)) * T(0.5); +} + } // namespace sampling } // namespace hlsl } // namespace nbl diff --git a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl index c65a688eb3..ed6c574284 100644 --- a/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl +++ b/include/nbl/builtin/hlsl/sampling/cos_weighted_spheres.hlsl @@ -31,18 +31,43 @@ struct ProjectedHemisphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(const vector_t2 _sample) + static vector_t3 __generate(const vector_t2 _sample) { vector_t2 p = concentricMapping(_sample * T(0.99999) + T(0.000005)); T z = hlsl::sqrt(hlsl::max(T(0.0), T(1.0) - p.x * p.x - p.y * p.y)); return vector_t3(p.x, p.y, z); } - static T pdf(const T L_z) + vector_t3 generate(const vector_t2 _sample) + { + return __generate(_sample); + } + + static vector_t2 __generateInverse(const vector_t3 L) + { + return invertConcentricMapping(L.xy); + } + + vector_t2 generateInverse(const vector_t3 L) + { + return __generateInverse(L); + } + + static T __pdf(const T L_z) { return L_z * numbers::inv_pi; } + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(__generate(_sample).z); + } + + scalar_type backwardPdf(const vector_t3 L) + { + return __pdf(L.z); + } + template > static ::nbl::hlsl::sampling::quotient_and_pdf quotient_and_pdf(const T L) { @@ -71,9 +96,9 @@ struct ProjectedSphere using sample_type = codomain_and_rcpPdf; using inverse_sample_type = domain_and_rcpPdf; - static vector_t3 generate(NBL_REF_ARG(vector_t3) _sample) + static vector_t3 __generate(NBL_REF_ARG(vector_t3) _sample) { - vector_t3 retval = hemisphere_t::generate(_sample.xy); + vector_t3 retval = hemisphere_t::__generate(_sample.xy); const bool chooseLower = _sample.z > T(0.5); retval.z = chooseLower ? (-retval.z) : retval.z; if (chooseLower) @@ -82,9 +107,36 @@ struct ProjectedSphere return retval; } - static T pdf(T L_z) + vector_t3 generate(NBL_REF_ARG(vector_t3) _sample) + { + return __generate(_sample); + } + + static vector_t3 __generateInverse(const vector_t3 L) + { + // TODO: incomplete information to get z component, we only know mapping of (u.z > 0.5 <-> L +ve) and (u.z < 0.5 <-> L -ve) + // so set to 0 or 1 for now + return vector_t3(hemisphere_t::__generateInverse(L.xy), hlsl::mix(T(0.0), T(1.0), L.z > T(0.0))); + } + + vector_t3 generateInverse(const vector_t3 L) + { + return __generateInverse(L); + } + + static T __pdf(T L_z) + { + return T(0.5) * hemisphere_t::__pdf(L_z); + } + + scalar_type forwardPdf(const vector_t2 _sample) + { + return __pdf(__generate(_sample).z); + } + + scalar_type backwardPdf(const vector_t3 L) { - return T(0.5) * hemisphere_t::pdf(L_z); + return __pdf(L.z); } template > From c4e63b3b4a7f898b0895703429528914187169a6 Mon Sep 17 00:00:00 2001 From: keptsecret Date: Wed, 4 Mar 2026 16:50:49 +0700 Subject: [PATCH 54/54] box muller transform add forward pdf, generate wasn't merged from pt branch --- .../builtin/hlsl/sampling/box_muller_transform.hlsl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl index 9f76f06576..01d6143de5 100644 --- a/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl +++ b/include/nbl/builtin/hlsl/sampling/box_muller_transform.hlsl @@ -28,6 +28,18 @@ struct BoxMullerTransform using density_type = scalar_type; using sample_type = codomain_and_rcpPdf; + vector2_type generate(const vector2_type u) + { + scalar_type sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + return vector2_type(cosPhi, sinPhi) * nbl::hlsl::sqrt(-2.0 * nbl::hlsl::log(xi.x)) * stddev; + } + + vector2_type forwardPdf(const vector2_type u) + { + return backwardPdf(generate(u)); + } + vector2_type backwardPdf(const vector2_type outPos) { const vector2_type outPos2 = outPos * outPos;