Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9597f8a
Bump tar from 7.5.9 to 7.5.11 in /js/node (#27616)
dependabot[bot] Mar 11, 2026
c07ece3
[EP Plugin Adapter] support LoggingManager::HasDefaultLogger() (#27587)
fs-eire Mar 11, 2026
0edb66b
Remove some dead YML code (#27530)
eserscor Mar 11, 2026
b8f5f1a
Add KernelInfo string-array attribute APIs to the ORT C and C++ APIs …
tianleiwu Mar 11, 2026
e91a5c3
Extend DQ→MatMulNBits fusion to support 2/8-bit weights and Cast(fp16…
jambayk Mar 11, 2026
250e102
Update .NET action version (#27618)
eserscor Mar 12, 2026
056bab3
Add validation of position_ids in RotaryEmbedding operators (#27597)
tianleiwu Mar 12, 2026
88841d3
chore: tooling shouldn't ignore forge hidden folders (#27580)
sanaa-hamel-microsoft Mar 12, 2026
555cf06
[CUDA] Add Validation of batch_indices in RoiAlign (#27603)
tianleiwu Mar 12, 2026
dd58b54
Expose Python `OrtDeviceVendorId` enum and use it for vendor-aware `O…
tianleiwu Mar 12, 2026
60ce0e6
Use `_tpause` instead of `__builtin_ia32_tpause` (#27607)
mocknen Mar 12, 2026
32511df
Fix Potential Integer Truncation Leading to Heap Out-of-Bounds Read/W…
chilo-ms Mar 12, 2026
69feb84
Add PCI bus fallback for Linux GPU device discovery in containerized …
Copilot Mar 12, 2026
2b8176c
Fix WebGPU EP crash on exit (#27569)
fs-eire Mar 12, 2026
201e240
[Cuda Plugin] Refactor CUDA ops — Move shared CPU/CUDA helper code fr…
tianleiwu Mar 12, 2026
5274c19
MLAS/POWER10: Optimize Sgemm PackA kernel using VSX intrinsics and as…
BODAPATIMAHESH Mar 12, 2026
b5f869b
Update to C++20 (#27178)
edgchen1 Mar 12, 2026
2e151ae
Use SafeInt in implementation of Col2Im (#27625)
fs-eire Mar 13, 2026
2927257
Add Qwen3 model type support to Python transformer optimizer (#27556)
Rishi-Dave Mar 13, 2026
d8c1826
[CUDA] Support volumetric (3-D) grid sampling in the CUDA GridSample …
hariharans29 Mar 13, 2026
99e0119
Optimize ONNX Attention KV cache with ConcatNewToPast and add release…
titaiwangms Mar 13, 2026
94661a4
Accept legacy WebGPU/WebNN memory info names for backward compatibili…
fs-eire Mar 13, 2026
d6dcef1
[js/rn] Add 16KB page size alignment for Android (#27523)
dodokw Mar 13, 2026
e5f5a6a
[TensorRT RTX] Fix compiler warnings on Linux and Windows (#27602)
gedoensmax Mar 13, 2026
ed3f8bf
Fix build errors of GridSample and test failures in test_attention_fu…
tianleiwu Mar 13, 2026
6c959f7
[Cuda Plugin] Refactor CUDA ops — Move more shared CPU/CUDA helper co…
tianleiwu Mar 13, 2026
e635e2d
Add PE version info to onnxruntime_providers_vitisai.dll (#27626)
xhan65 Mar 13, 2026
b280801
Skip building custom_op_library if CUDA_MINIMAL is enabled (#27520)
chilo-ms Mar 14, 2026
09b5695
Fix DQ→MatMulNBits fusion for FP16 models on CPU EP (#27640)
jambayk Mar 14, 2026
de92668
Revert "[QNN EP] Fix error messages being logged as VERBOSE instead o…
derdeljan-msft Mar 14, 2026
50f3cc3
Cleanup external data path validation (#27539)
adrianlizarraga Mar 14, 2026
d45686d
Add C# and Python language bindings for GetCompatibilityInfoFromModel…
adrastogi Mar 14, 2026
abe0b62
[VitisAI] Fix unused warnings (#27630)
amd-genmingz Mar 14, 2026
227c3d5
Add RotaryEmbedding fusion for Qwen3 on-the-fly RoPE patterns (#27590)
Rishi-Dave Mar 16, 2026
0f66526
[Plugin EP] Check for nullptr before dereferencing (#27644)
apwojcik Mar 16, 2026
929f73e
Plugin EP: Fix bug that incorrectly assigned duplicate MetDef IDs to …
adrianlizarraga Mar 16, 2026
4c3ab77
[web] Fix 2 bugs in emdawnwebgpu (patches dawn) (#27641)
fs-eire Mar 16, 2026
98a5495
Fix NvTensorRTRTXExecutionProvider capability claims (#27631)
Copilot Mar 16, 2026
535161a
[Build] Fix debug build (#27659)
tianleiwu Mar 16, 2026
68d2ab8
Merge remote-tracking branch 'origin/master' into sync_msft_18032026
Jaswanth51 Mar 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion .github/workflows/windows_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,51 @@ jobs:
working-directory: ${{ github.workspace }}

- name: Use .NET 8.x
uses: actions/setup-dotnet@v3
uses: actions/setup-dotnet@v5
with:
dotnet-version: '8.x'
env:
PROCESSOR_ARCHITECTURE: x86 # x86 .NET

- name: Prefer x86 dotnet on PATH
shell: pwsh
run: |
$x86DotnetDir = 'C:\Program Files (x86)\dotnet'
$x64DotnetDir = 'C:\Program Files\dotnet'
$machinePath = [Environment]::GetEnvironmentVariable('Path', 'Machine')
Write-Host "Machine PATH: $machinePath"

$pathEntries = @($env:PATH -split ';' | Where-Object { $_ })
$reorderedPathEntries = @(
$x86DotnetDir
$pathEntries | Where-Object { $_ -ne $x86DotnetDir }
)

$env:PATH = ($reorderedPathEntries -join ';')

# Only add the x86 dotnet directory to GITHUB_PATH if it is not already on PATH (after normalization)
$normalizedX86DotnetDir = $x86DotnetDir.TrimEnd('\')
$normalizedPathEntries = $pathEntries | ForEach-Object { $_.TrimEnd('\') }
if (-not ($normalizedPathEntries -contains $normalizedX86DotnetDir)) {
Add-Content -Path $env:GITHUB_PATH -Value $x86DotnetDir
}
$dotnetPaths = @(Get-Command dotnet -All | Select-Object -ExpandProperty Source -Unique)
Write-Host 'Resolved dotnet executables:'
$dotnetPaths | ForEach-Object { Write-Host $_ }

$x86DotnetExe = "$x86DotnetDir\dotnet.exe"
$x64DotnetExe = "$x64DotnetDir\dotnet.exe"
$x86Index = $dotnetPaths.IndexOf($x86DotnetExe)
$x64Index = $dotnetPaths.IndexOf($x64DotnetExe)

if ($x86Index -lt 0) {
throw "Expected x86 dotnet executable was not found: $x86DotnetExe"
}

if ($x64Index -ge 0 -and $x86Index -gt $x64Index) {
throw "x86 dotnet must appear before x64 dotnet on PATH. Found $x86DotnetExe after $x64DotnetExe."
}

- name: Use Nuget 6.x
uses: nuget/setup-nuget@v2
with:
Expand Down
4 changes: 4 additions & 0 deletions .ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
!/.config
!/.devcontainer
!/.github
!/.pipelines
355 changes: 0 additions & 355 deletions .pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml

This file was deleted.

6 changes: 0 additions & 6 deletions .pipelines/nuget_config/x64/packages.config

This file was deleted.

6 changes: 0 additions & 6 deletions .pipelines/nuget_config/x86/packages.config

This file was deleted.

180 changes: 0 additions & 180 deletions .pipelines/windowsai-steps.yml

This file was deleted.

34 changes: 19 additions & 15 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,24 @@ cmake_policy(SET CMP0104 OLD)
# Project
project(onnxruntime C CXX ASM)

# Set C/C++ standard versions
if (NOT CMAKE_C_STANDARD)
# Needed for Java
set(CMAKE_C_STANDARD 99)
endif()

if (NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 20)
endif()

# We don't use C++20 modules yet.
# There are some known issues to address first:
# - Android builds from Linux Docker containers have trouble finding clang-scan-deps.
# - The MSVC /permissive option is needed for compiling some of the CUDA EP code which uses CUTLASS.
# This option is not compatible with C++20 modules.
# So we will skip module scanning for now.
set(CMAKE_CXX_SCAN_FOR_MODULES OFF)

# Disable fast-math for Intel oneAPI compiler
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC-like")
Expand All @@ -21,11 +39,6 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
endif()
endif()

# Needed for Java
if (NOT CMAKE_CXX_STANDARD)
set(CMAKE_C_STANDARD 99)
endif()

include(CheckCXXCompilerFlag)
include(CheckLanguage)
include(CMakeDependentOption)
Expand All @@ -34,15 +47,6 @@ include(CheckFunctionExists)
include(CheckSymbolExists)
include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables

if (NOT CMAKE_CXX_STANDARD)
# TODO: update this once all system adapt c++20
if (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(CMAKE_CXX_STANDARD 20)
else()
set(CMAKE_CXX_STANDARD 17)
endif()
endif()

if (MSVC)
# Make sure Visual Studio sets __cplusplus macro correctly: https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
Expand Down Expand Up @@ -1437,7 +1441,7 @@ configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_c
get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)

if (onnxruntime_USE_CUDA)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
if(onnxruntime_CUDA_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDA_HOME} CUDAToolkit_ROOT)
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ onnxruntime_fetchcontent_declare(
URL ${DEP_URL_cutlass}
URL_HASH SHA1=${DEP_SHA1_cutlass}
EXCLUDE_FROM_ALL
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch
)

FetchContent_GetProperties(cutlass)
Expand Down
22 changes: 22 additions & 0 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,16 @@ if (onnxruntime_USE_WEBGPU)
#
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/safari_polyfill.patch &&

# The dawn_device_lost_keepalive.patch contains the following changes:
#
# - (private) Fix premature ABORT when device.lost fires in callUserCallback
# The device.lost handler was wrapped in callUserCallback without runtimeKeepalivePush/Pop,
# causing maybeExit() to trigger _exit(0) and set ABORT=true when runtimeKeepaliveCounter
# was 0. This silently dropped all subsequent WebGPU callbacks (e.g. requestAdapter),
# breaking session re-creation after device destruction.
#
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_device_lost_keepalive.patch &&

# The dawn_dxc_output_dir.patch contains the following changes:
#
# - (private) Fix DXC output directory for RelWithDebInfo and MinSizeRel configs
Expand All @@ -762,6 +772,18 @@ if (onnxruntime_USE_WEBGPU)
#
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_dxc_output_dir.patch &&

# The dawn_buffer_fix_injection.patch contains the following changes:
#
# - (private) Fix importJsBuffer calling wrong WGPUBufferImpl constructor
# Without this patch, importJsBuffer calls emwgpuCreateBuffer which invokes the
# (source, mappedAtCreation=false) constructor instead of the injection constructor
# tagged with kImportedFromJS. This patch adjusts the injection constructor signature
# to disambiguate it from the (source, mappedAtCreation) overload so emwgpuCreateBuffer
# reliably selects the injection constructor and imported buffers are properly tagged
# as kImportedFromJS.
#
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn_buffer_fix_injection.patch &&

# Remove the test folder to speed up potential file scan operations (70k+ files not needed for build).
# Using <SOURCE_DIR> token ensures the correct absolute path regardless of working directory.
${CMAKE_COMMAND} -E rm -rf <SOURCE_DIR>/test)
Expand Down
3 changes: 2 additions & 1 deletion cmake/onnxruntime_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ if (WIN32)
"${ONNXRUNTIME_ROOT}/core/platform/windows/device_discovery.cc")
elseif (LINUX)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc")
"${ONNXRUNTIME_ROOT}/core/platform/linux/device_discovery.cc"
"${ONNXRUNTIME_ROOT}/core/platform/linux/pci_device_discovery.h")
elseif (APPLE)
list(APPEND onnxruntime_common_src_patterns
"${ONNXRUNTIME_ROOT}/core/platform/apple/device_discovery.cc")
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#cmakedefine HAS_PARENTHESES
#cmakedefine HAS_REALLOCARRAY
#cmakedefine HAS_SHORTEN_64_TO_32
#cmakedefine HAS_STRINGOP_OVERFLOW
#cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE
#cmakedefine HAS_UNUSED_BUT_SET_PARAMETER
#cmakedefine HAS_UNUSED_BUT_SET_VARIABLE
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_fuzz_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ if (onnxruntime_FUZZ_ENABLED)
# compile the executables
onnxruntime_add_executable(onnxruntime_security_fuzz ${SEC_FUZ_SRC})

# compile with c++17
# compile with at least c++17
target_compile_features(onnxruntime_security_fuzz PUBLIC cxx_std_17)

# Security fuzzing engine header file reference
Expand Down
Loading
Loading