From a1476d8c695eeb34f8c3a1282ac0db92b0347209 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 17 May 2026 19:00:25 +0100 Subject: [PATCH 01/17] [NFC][X86] shouldUseHorizontalOp - constify SelectionDAG argument (#198185) Noticed while working on cleaning up VECREDUCE_ADD handling --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3dc9f503088e6..c91143fefb6af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20961,7 +20961,7 @@ std::pair X86TargetLowering::BuildFILD( /// Horizontal vector math instructions may be slower than normal math with /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch /// implementation, and likely shuffle complexity of the alternate sequence. -static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, +static bool shouldUseHorizontalOp(bool IsSingleSource, const SelectionDAG &DAG, const X86Subtarget &Subtarget) { bool IsOptimizingSize = DAG.shouldOptForSize(); bool HasFastHOps = Subtarget.hasFastHorizontalOps(); From f94a6901f82dcb36de7c889525ba8223198712fd Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 17 May 2026 11:41:27 -0700 Subject: [PATCH 02/17] [Passes] Use consistent naming for printer passes (#198144) --- llvm/docs/AliasAnalysis.rst | 6 +++--- llvm/docs/Passes.rst | 8 ++++---- llvm/include/llvm/LinkAllPasses.h | 1 - llvm/lib/Passes/PassRegistry.def | 11 +++++------ llvm/test/Analysis/AliasSet/argmemonly.ll | 2 +- llvm/test/Analysis/AliasSet/guards.ll | 2 +- llvm/test/Analysis/AliasSet/intrinsics.ll | 2 +- llvm/test/Analysis/AliasSet/memloc-vscale.ll | 2 +- llvm/test/Analysis/AliasSet/memset.ll | 2 +- llvm/test/Analysis/AliasSet/memtransfer.ll | 2 +- llvm/test/Analysis/AliasSet/saturation.ll | 4 ++-- .../Analysis/BasicAA/separate_storage-alias-sets.ll | 2 +- llvm/test/Analysis/MustExecute/const-cond.ll | 2 +- llvm/test/Analysis/MustExecute/infinite_loops.ll | 2 +- llvm/test/Analysis/MustExecute/irreducible-cfg.ll | 2 +- llvm/test/Analysis/MustExecute/loop-header.ll | 2 +- .../Analysis/MustExecute/must_be_executed_context.ll | 2 +- llvm/test/Analysis/MustExecute/pr57780.ll | 2 +- llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll | 2 +- .../Analysis/ValueTracking/memory-dereferenceable.ll | 4 ++-- llvm/test/Other/debugcounter-predicateinfo.ll | 2 +- llvm/test/Other/print-cfg-scc.ll | 2 +- .../Transforms/FunctionSpecialization/ssa-copy.ll | 2 +- llvm/test/Transforms/LoopVectorize/pr25281.ll | 2 +- .../Util/PredicateInfo/assume-operand-bundles.ll | 2 +- .../Util/PredicateInfo/branch-on-same-cond.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/condprop.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/diamond.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/edge.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/ordering.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/pr33456.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/pr33457.ll | 2 +- llvm/test/Transforms/Util/PredicateInfo/testandor.ll | 2 +- .../Transforms/Util/PredicateInfo/unnamed-types.ll | 2 +- .../test/Transforms/Util/PredicateInfo/unreachable.ll | 2 +- 35 files changed, 45 insertions(+), 47 deletions(-) diff --git a/llvm/docs/AliasAnalysis.rst b/llvm/docs/AliasAnalysis.rst index af6da8cf64ea3..9b05c9d7be138 100644 --- a/llvm/docs/AliasAnalysis.rst +++ b/llvm/docs/AliasAnalysis.rst @@ -638,16 +638,16 @@ implementations. You can use them with commands like: % opt -ds-aa -aa-eval foo.bc -disable-output -stats -The ``-print-alias-sets`` pass +The ``print`` pass ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``-print-alias-sets`` pass is exposed as part of the ``opt`` tool to print +The ``print`` pass is exposed as part of the ``opt`` tool to print out the Alias Sets formed by the `AliasSetTracker`_ class. This is useful if you're using the ``AliasSetTracker`` class. To use it, use something like: .. code-block:: bash - % opt -ds-aa -print-alias-sets -disable-output + % opt -passes='print' -disable-output The ``-aa-eval`` pass ^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst index 882ec2a71577e..66a682983d9d5 100644 --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -220,8 +220,8 @@ This pass decodes the debug info metadata in a module and prints it to standard This pass is a simple post-dominator construction algorithm for finding post-dominators. -``print-alias-sets``: Alias Set Printer ---------------------------------------- +``print``: Alias Set Printer +---------------------------------------- Yet to be written. @@ -237,8 +237,8 @@ in a human-readable form. This pass, only available in ``opt``, prints the SCCs of the call graph to standard error in a human-readable form. -``print-cfg-sccs``: Print SCCs of each function CFG ---------------------------------------------------- +``print``: Print SCCs of each function CFG +---------------------------------------------------- This pass, only available in ``opt``, prints the SCCs of each function CFG to standard error in a human-readable form. diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a01a67f136710..5182341fa7a89 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -155,7 +155,6 @@ struct ForcePassLinking { llvm::AliasAnalysis AA(TLI); llvm::BatchAAResults BAA(AA); llvm::AliasSetTracker X(BAA); - X.add(llvm::MemoryLocation()); // for -print-alias-sets (void)llvm::AreStatisticsEnabled(); (void)llvm::sys::RunningOnValgrind(); } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 3328bc0fe836f..0b5a6d8223102 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -507,12 +507,11 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("place-safepoints", PlaceSafepointsPass()) FUNCTION_PASS("print", PrintFunctionPass(errs())) -// TODO: rename to print after NPM switch -FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(errs())) -FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs())) -FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs())) -FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs())) -FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs())) +FUNCTION_PASS("print", AliasSetsPrinterPass(errs())) +FUNCTION_PASS("print", CFGSCCPrinterPass(errs())) +FUNCTION_PASS("print", MemDerefPrinterPass(errs())) +FUNCTION_PASS("print", MustExecutePrinterPass(errs())) +FUNCTION_PASS("print", PredicateInfoPrinterPass(errs())) FUNCTION_PASS("print", AssumptionPrinterPass(errs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(errs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(errs())) diff --git a/llvm/test/Analysis/AliasSet/argmemonly.ll b/llvm/test/Analysis/AliasSet/argmemonly.ll index 995fb26ca436a..6912447491149 100644 --- a/llvm/test/Analysis/AliasSet/argmemonly.ll +++ b/llvm/test/Analysis/AliasSet/argmemonly.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/guards.ll b/llvm/test/Analysis/AliasSet/guards.ll index 9ca70f6244800..32f74a41773d6 100644 --- a/llvm/test/Analysis/AliasSet/guards.ll +++ b/llvm/test/Analysis/AliasSet/guards.ll @@ -1,4 +1,4 @@ -; RUN: opt -aa-pipeline=basic-aa -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='print' -S -o - < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) ; CHECK: Alias sets for function 'test0': diff --git a/llvm/test/Analysis/AliasSet/intrinsics.ll b/llvm/test/Analysis/AliasSet/intrinsics.ll index 0dc802ca7e0aa..a0a319ae3e683 100644 --- a/llvm/test/Analysis/AliasSet/intrinsics.ll +++ b/llvm/test/Analysis/AliasSet/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions" +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions" ; CHECK: Alias sets for function 'test1': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll index 6b41604637405..acd183818903b 100644 --- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll +++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes=print-alias-sets 2>&1 | FileCheck %s +; RUN: opt -S < %s -passes='print' 2>&1 | FileCheck %s ; CHECK-LABEL: Alias sets for function 'sn' ; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Memory locations: (ptr %p, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8)) diff --git a/llvm/test/Analysis/AliasSet/memset.ll b/llvm/test/Analysis/AliasSet/memset.ll index 17f1e53f77e32..01242fa0171b3 100644 --- a/llvm/test/Analysis/AliasSet/memset.ll +++ b/llvm/test/Analysis/AliasSet/memset.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/memtransfer.ll b/llvm/test/Analysis/AliasSet/memtransfer.ll index 93290c39620eb..b91a14099f68c 100644 --- a/llvm/test/Analysis/AliasSet/memtransfer.ll +++ b/llvm/test/Analysis/AliasSet/memtransfer.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/saturation.ll b/llvm/test/Analysis/AliasSet/saturation.ll index 27f5bbee2f55c..1d547dcd8f26a 100644 --- a/llvm/test/Analysis/AliasSet/saturation.ll +++ b/llvm/test/Analysis/AliasSet/saturation.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=print-alias-sets -alias-set-saturation-threshold=4 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOSAT -; RUN: opt -passes=print-alias-sets -alias-set-saturation-threshold=3 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT +; RUN: opt -passes='print' -alias-set-saturation-threshold=4 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOSAT +; RUN: opt -passes='print' -alias-set-saturation-threshold=3 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT ; CHECK-LABEL: 'nomerge' ; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(4)) diff --git a/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll b/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll index 37d8e55cb2ff3..ad213c79697ad 100644 --- a/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll +++ b/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll @@ -2,7 +2,7 @@ ; separate storage hints. This lets alias analysis users (such as the alias set ; tracker) who can't be context-sensitive still get the benefits of hints. -; RUN: opt < %s -basic-aa-separate-storage -S -passes=print-alias-sets 2>&1 | FileCheck %s +; RUN: opt < %s -basic-aa-separate-storage -S -passes='print' 2>&1 | FileCheck %s declare void @llvm.assume(i1) diff --git a/llvm/test/Analysis/MustExecute/const-cond.ll b/llvm/test/Analysis/MustExecute/const-cond.ll index 97358f670b88c..01553b2ca857d 100644 --- a/llvm/test/Analysis/MustExecute/const-cond.ll +++ b/llvm/test/Analysis/MustExecute/const-cond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; In general the CFG below is easily simplified but this is useful for ; pass ordering issue elimination. diff --git a/llvm/test/Analysis/MustExecute/infinite_loops.ll b/llvm/test/Analysis/MustExecute/infinite_loops.ll index 3fac1293df5d4..844dd944b5d38 100644 --- a/llvm/test/Analysis/MustExecute/infinite_loops.ll +++ b/llvm/test/Analysis/MustExecute/infinite_loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; Infinite loop. ; Make sure that the backedge is mustexec. diff --git a/llvm/test/Analysis/MustExecute/irreducible-cfg.ll b/llvm/test/Analysis/MustExecute/irreducible-cfg.ll index a452761ab3356..5db862995cecd 100644 --- a/llvm/test/Analysis/MustExecute/irreducible-cfg.ll +++ b/llvm/test/Analysis/MustExecute/irreducible-cfg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; The loop body has two predecessors, %header and %side-entry. This leads to irreducible-cfg define i64 @baz() { diff --git a/llvm/test/Analysis/MustExecute/loop-header.ll b/llvm/test/Analysis/MustExecute/loop-header.ll index 50efd74b61b11..d632323f94978 100644 --- a/llvm/test/Analysis/MustExecute/loop-header.ll +++ b/llvm/test/Analysis/MustExecute/loop-header.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s define i1 @header_with_icf(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @header_with_icf( diff --git a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll index f3360f7cd0753..d57a54a84eb6e 100644 --- a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll +++ b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=print-mustexecute -disable-output 2>&1 | FileCheck %s --check-prefix=ME +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s --check-prefix=ME ; RUN: opt < %s -passes=print-must-be-executed-contexts -disable-output 2>&1 | FileCheck %s --check-prefix=MBEC ; ; void simple_conditional(int c) { diff --git a/llvm/test/Analysis/MustExecute/pr57780.ll b/llvm/test/Analysis/MustExecute/pr57780.ll index 4b26cadbb42c5..2f6a7a2831f8e 100644 --- a/llvm/test/Analysis/MustExecute/pr57780.ll +++ b/llvm/test/Analysis/MustExecute/pr57780.ll @@ -1,4 +1,4 @@ -; RUN: opt -disable-output -passes=print-mustexecute < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' < %s 2>&1 | FileCheck %s @c = global i16 0, align 2 diff --git a/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll b/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll index 4b63c036f5491..47b8cc95388b4 100644 --- a/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll +++ b/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK target datalayout = "e-i32:32:64" diff --git a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll index 8c5216e0c45d9..fc40c68624f2d 100644 --- a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll +++ b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics=false 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK,POINT +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics=false 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK,POINT ; Uses the print-deref (+ analyze to print) pass to run diff --git a/llvm/test/Other/debugcounter-predicateinfo.ll b/llvm/test/Other/debugcounter-predicateinfo.ll index d91b0f8904cb0..57929220c6a28 100644 --- a/llvm/test/Other/debugcounter-predicateinfo.ll +++ b/llvm/test/Other/debugcounter-predicateinfo.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -debug-counter=predicateinfo-rename=1 -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -debug-counter=predicateinfo-rename=1 -passes='print' < %s 2>&1 | FileCheck %s ;; Test that, with debug counters on, we don't rename the first info, only the second define fastcc void @barney() { ; CHECK-LABEL: @barney( diff --git a/llvm/test/Other/print-cfg-scc.ll b/llvm/test/Other/print-cfg-scc.ll index 19ac00e3d7641..31ccd614a1918 100644 --- a/llvm/test/Other/print-cfg-scc.ll +++ b/llvm/test/Other/print-cfg-scc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=print-cfg-sccs -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s ; CHECK: SCC #1: %UnifiedExitNode ; CHECK: SCC #2: %loopexit.5, %loopexit.6, %loopentry.7, %loopentry.6, %loopentry.5, %endif.2 diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll index 11696cb76c8bb..b7cee46aa6f32 100644 --- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll +++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes=print-predicateinfo -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF +; RUN: opt -passes='print' -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF ; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ ; RUN: -funcspec-for-literal-constant=true \ ; RUN: -funcspec-min-codesize-savings=50 \ diff --git a/llvm/test/Transforms/LoopVectorize/pr25281.ll b/llvm/test/Transforms/LoopVectorize/pr25281.ll index 06031d7e925af..4b8369936123c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr25281.ll +++ b/llvm/test/Transforms/LoopVectorize/pr25281.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -aa-pipeline=scev-aa -passes=loop-vectorize,print-alias-sets -S -o - 2>&1 | FileCheck %s +; RUN: opt < %s -aa-pipeline=scev-aa -passes='loop-vectorize,print' -S -o - 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; PR25281 diff --git a/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll b/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll index ab23d3354da66..646e5383fade4 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -S -passes=print-predicateinfo %s 2>&1 >/dev/null | FileCheck %s +; RUN: opt -S -passes='print' %s 2>&1 >/dev/null | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll index f024106b7299a..8feec397ac545 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -S -passes=print-predicateinfo < %s 2>&1 >/dev/null | FileCheck %s +; RUN: opt -S -passes='print' < %s 2>&1 >/dev/null | FileCheck %s ; FIXME: RenamedOp should be %cmp or %x in all cases here, ; which is the value used in the condition. diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll index 42e8ccb760b3f..e9e36c7a2fdc6 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s @a = external global i32 ; [#uses=7] diff --git a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll index 06c02d699c511..4e4ee0f2b09bd 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define i1 @f(i32 %x, i1 %y) { ; CHECK-LABEL: @f( ; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] diff --git a/llvm/test/Transforms/Util/PredicateInfo/edge.ll b/llvm/test/Transforms/Util/PredicateInfo/edge.ll index 913832696215e..de18a7ad4474f 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/edge.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/edge.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define i32 @f1(i32 %x) { ; CHECK-LABEL: @f1( diff --git a/llvm/test/Transforms/Util/PredicateInfo/ordering.ll b/llvm/test/Transforms/Util/PredicateInfo/ordering.ll index d7ce9dc652c8b..3a1cdf1ee955d 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/ordering.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/ordering.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=print-predicateinfo -debug < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -debug < %s 2>&1 | FileCheck %s declare void @use(i32) declare void @use.i1(i1) diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll index 4762d376ef5aa..3f26f437c860a 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 1, align 4 @d = common global i32 0, align 4 diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll index e4fd4cc6dd8a2..138297ccdb4b5 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 6, align 4 @c = global i32 -1, align 4 diff --git a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll index d29aadd54128c..bee4fcd292427 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s declare void @foo(i1) declare void @bar(i32) diff --git a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll index faf4bec61c935..0f1685f942fa1 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll @@ -1,4 +1,4 @@ -; RUN: opt -disable-output -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' < %s 2>&1 | FileCheck %s %1 = type opaque %0 = type opaque diff --git a/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll b/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll index 99314d45913e0..05536b0962906 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' < %s 2>&1 | FileCheck %s declare void @foo() declare void @llvm.assume(i1) From 47e142b5ee3d1a428c91dc25a281972e19b57a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sun, 17 May 2026 20:47:33 +0200 Subject: [PATCH 03/17] [AArch64] Fix handling of x29/x30 in inline assembly clobbers (#167783) The AArch64 backend was silently ignoring inline assembly clobbers when numeric register names (x29, x30) were used instead of their architectural aliases (fp, lr). I found this bug via inline assembly in Zig, which not normalize the register names the way clang does. There is an incoplete workaround for this in Rust, but that only handles `x30/lr`, not `x29/fp`. I thought it would make sense to fix this properly rather than adding a workaround to Zig. This patch adds explicit handling in getRegForInlineAsmConstraint() to map both numeric and alias forms to the correct physical registers, following the same pattern used by the RISC-V backend. I've left `x31/sp` without changes, it would nice to have to have warning when trying to clobber `x31`, just like there is for `sp`, but that register needs different handling, so it's best done separately. If you have code like this: define void @clobber_x30() nounwind { tail call void asm sideeffect "nop", "~{x30}"() ret void } Here is the generated assembly before: clobber_x30: // @clobber_x30 //APP nop //NO_APP ret And after: clobber_x30: // @clobber_x30 str x30, [sp, #-16]! // 8-byte Folded Spill //APP nop //NO_APP ldr x30, [sp], #16 // 8-byte Folded Reload ret --- llvm/docs/ReleaseNotes.md | 5 +++ .../Target/AArch64/AArch64ISelLowering.cpp | 15 +++++++ .../AArch64/inline-asm-clobber-x29-x30.ll | 44 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index fffd696e59baf..12b7ad458e93e 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -148,6 +148,11 @@ Makes programs 10x faster by doing Special New Thing. disabled by default to maintain compatibility with Binutils and LLVM older toolchains that do not define the `R_AARCH64_TLS_DTPREL64` static relocation type for TLS offsets. +* A bug was fixed that caused LLVM IR inline assembly clobbers of the x29 and + x30 registers to be ignored when they were written using their xN names + instead of the ABI names FP and LR. Note that LLVM IR produced by Clang + always uses the ABI names, but other frontends may not. + ([#167783](https://github.com/llvm/llvm-project/pull/167783)) ### Changes to the AMDGPU Backend diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0eac7bc11a777..3016e1b94c22d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/SmallVectorExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -13698,6 +13699,20 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); } + // Clang will correctly decode the usage of register name aliases into their + // official names. However, other frontends like `rustc` do not. The + // conversion below allows users of these frontends to use the ABI names for + // registers in LLVM-style register constraints. + // + // x31->sp is not included here because it's not a general register and + // needs different handling + unsigned XRegFromAlias = StringSwitch(Constraint.lower()) + .Cases({"{x29}", "{fp}"}, AArch64::FP) + .Cases({"{x30}", "{lr}"}, AArch64::LR) + .Default(AArch64::NoRegister); + if (XRegFromAlias != AArch64::NoRegister) + return std::make_pair(XRegFromAlias, &AArch64::GPR64RegClass); + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; diff --git a/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll b/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll new file mode 100644 index 0000000000000..cde55522fb19b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck %s + +; Test that both numeric register names (x29, x30) and their architectural +; aliases (fp, lr) work correctly as clobbers in inline assembly. + +define void @clobber_x29() nounwind { +; CHECK-LABEL: clobber_x29: +; CHECK: str x29, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x29, [sp + tail call void asm sideeffect "", "~{x29}"() + ret void +} + +define void @clobber_fp() nounwind { +; CHECK-LABEL: clobber_fp: +; CHECK: str x29, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x29, [sp + tail call void asm sideeffect "", "~{fp}"() + ret void +} + +define void @clobber_x30() nounwind { +; CHECK-LABEL: clobber_x30: +; CHECK: str x30, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x30, [sp + tail call void asm sideeffect "", "~{x30}"() + ret void +} + +define void @clobber_lr() nounwind { +; CHECK-LABEL: clobber_lr: +; CHECK: str x30, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x30, [sp + tail call void asm sideeffect "", "~{lr}"() + ret void +} From 3a2876d8cbab2efe2dd866de14edf372671a25ee Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Sun, 17 May 2026 21:01:05 +0200 Subject: [PATCH 04/17] [mlir][SPIR-V] Fix math.powf lowering for non-integer exponents (#197727) The ConvertFToS usage only works when y is an integer. Use it only for integer constants, for others: lower as GL.Exp(y * GL.Log(x)) --- .../Conversion/MathToSPIRV/MathToSPIRV.cpp | 130 ++++++++++-------- .../MathToSPIRV/math-to-gl-spirv.mlir | 99 +++++++++---- 2 files changed, 145 insertions(+), 84 deletions(-) diff --git a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp index 01285c6c0ec09..265aca75fae36 100644 --- a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp +++ b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp @@ -15,9 +15,11 @@ #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Matchers.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" #define DEBUG_TYPE "math-to-spirv-pattern" @@ -360,75 +362,81 @@ struct PowFOpPattern final : public OpConversionPattern { if (!dstType) return failure(); - // Get the scalar float type. - FloatType scalarFloatType; - if (auto scalarType = dyn_cast(powfOp.getType())) { - scalarFloatType = scalarType; - } else if (auto vectorType = dyn_cast(powfOp.getType())) { - scalarFloatType = cast(vectorType.getElementType()); - } else { - return failure(); + Location loc = powfOp.getLoc(); + Type operandType = adaptor.getRhs().getType(); + + // Parity-based lowering requires an integer-valued constant exponent. + // Otherwise fall back to exp(y*log(x)), which yields NaN for x<0 (matches + // C). + auto isOdd = [](const APFloat &v) { + APSInt i(/*BitWidth=*/64, /*isUnsigned=*/false); + bool ignored; + v.convertToInteger(i, APFloat::rmTowardZero, &ignored); + return i[0]; + }; + + SmallVector oddMask; + Attribute rhsAttr; + if (matchPattern(adaptor.getRhs(), m_Constant(&rhsAttr))) { + TypeSwitch(rhsAttr) + .Case([&](FloatAttr a) { + if (a.getValue().isInteger()) + oddMask.push_back(isOdd(a.getValue())); + }) + .Case([&](SplatElementsAttr a) { + APFloat splat = a.getSplatValue(); + if (splat.isInteger()) + oddMask.push_back(isOdd(splat)); + }) + .Case([&](DenseElementsAttr a) { + SmallVector mask; + for (const APFloat &elt : a.getValues()) { + if (!elt.isInteger()) + return; + mask.push_back(isOdd(elt)); + } + oddMask = std::move(mask); + }); } - // Get int type of the same shape as the float type. - Type scalarIntType = rewriter.getIntegerType(32); - Type intType = scalarIntType; - auto operandType = adaptor.getRhs().getType(); - if (auto vectorType = dyn_cast(operandType)) { - auto shape = vectorType.getShape(); - intType = VectorType::get(shape, scalarIntType); + if (oddMask.empty()) { + Value log = spirv::GLLogOp::create(rewriter, loc, adaptor.getLhs()); + Value mul = spirv::FMulOp::create(rewriter, loc, adaptor.getRhs(), log); + rewriter.replaceOpWithNewOp(powfOp, mul); + return success(); + } + + // GL.Pow is undefined for x < 0; take abs and conditionally negate the + // result for lanes whose exponent is odd. + Value abs = spirv::GLFAbsOp::create(rewriter, loc, adaptor.getLhs()); + Value pow = spirv::GLPowOp::create(rewriter, loc, abs, adaptor.getRhs()); + + // No odd-parity element: result has the same sign as |lhs|^rhs >= 0. + if (llvm::none_of(oddMask, [](bool b) { return b; })) { + rewriter.replaceOp(powfOp, pow); + return success(); } - // Per GL Pow extended instruction spec: - // "Result is undefined if x < 0. Result is undefined if x = 0 and y <= 0." - Location loc = powfOp.getLoc(); Value zero = spirv::ConstantOp::getZero(operandType, loc, rewriter); Value lessThan = spirv::FOrdLessThanOp::create(rewriter, loc, adaptor.getLhs(), zero); - - // Per C/C++ spec: - // > pow(base, exponent) returns NaN (and raises FE_INVALID) if base is - // > finite and negative and exponent is finite and non-integer. - // Calculate the reminder from the exponent and check whether it is zero. - Value floatOne = spirv::ConstantOp::getOne(operandType, loc, rewriter); - Value expRem = - spirv::FRemOp::create(rewriter, loc, adaptor.getRhs(), floatOne); - Value expRemNonZero = - spirv::FOrdNotEqualOp::create(rewriter, loc, expRem, zero); - Value cmpNegativeWithFractionalExp = - spirv::LogicalAndOp::create(rewriter, loc, expRemNonZero, lessThan); - // Create NaN result and replace base value if conditions are met. - const auto &floatSemantics = scalarFloatType.getFloatSemantics(); - const auto nan = APFloat::getNaN(floatSemantics); - Attribute nanAttr = rewriter.getFloatAttr(scalarFloatType, nan); - if (auto vectorType = dyn_cast(operandType)) - nanAttr = DenseElementsAttr::get(vectorType, nan); - - Value nanValue = - spirv::ConstantOp::create(rewriter, loc, operandType, nanAttr); - Value lhs = - spirv::SelectOp::create(rewriter, loc, cmpNegativeWithFractionalExp, - nanValue, adaptor.getLhs()); - Value abs = spirv::GLFAbsOp::create(rewriter, loc, lhs); - - // TODO: The following just forcefully casts y into an integer value in - // order to properly propagate the sign, assuming integer y cases. It - // doesn't cover other cases and should be fixed. - - // Cast exponent to integer and calculate exponent % 2 != 0. - Value intRhs = - spirv::ConvertFToSOp::create(rewriter, loc, intType, adaptor.getRhs()); - Value intOne = spirv::ConstantOp::getOne(intType, loc, rewriter); - Value bitwiseAndOne = - spirv::BitwiseAndOp::create(rewriter, loc, intRhs, intOne); - Value isOdd = spirv::IEqualOp::create(rewriter, loc, bitwiseAndOne, intOne); - - // calculate pow based on abs(lhs)^rhs. - Value pow = spirv::GLPowOp::create(rewriter, loc, abs, adaptor.getRhs()); Value negate = spirv::FNegateOp::create(rewriter, loc, pow); - // if the exponent is odd and lhs < 0, negate the result. - Value shouldNegate = - spirv::LogicalAndOp::create(rewriter, loc, lessThan, isOdd); + + Value shouldNegate; + if (llvm::all_equal(oddMask)) { + // Every lane has odd exponent: negate iff lhs < 0. + shouldNegate = lessThan; + } else { + // Mixed parity (non-splat dense vector): AND lhs<0 with a per-element + // constant odd-mask. + auto vecType = cast(operandType); + auto maskType = VectorType::get(vecType.getShape(), rewriter.getI1Type()); + Value oddConst = spirv::ConstantOp::create( + rewriter, loc, maskType, DenseElementsAttr::get(maskType, oddMask)); + shouldNegate = + spirv::LogicalAndOp::create(rewriter, loc, lessThan, oddConst); + } + rewriter.replaceOpWithNewOp(powfOp, shouldNegate, negate, pow); return success(); diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir index 8eb533eeff2a9..08d7822d04cc1 100644 --- a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir +++ b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir @@ -183,45 +183,98 @@ func.func @ctlz_vector2(%val: vector<2xi32>) -> vector<2xi32> { return %0 : vector<2xi32> } +// Dynamic exponent: exp(y * log(x)); yields NaN for x<0. // CHECK-LABEL: @powf_scalar // CHECK-SAME: (%[[LHS:.+]]: f32, %[[RHS:.+]]: f32) func.func @powf_scalar(%lhs: f32, %rhs: f32) -> f32 { + // CHECK: %[[LOG:.+]] = spirv.GL.Log %[[LHS]] : f32 + // CHECK: %[[MUL:.+]] = spirv.FMul %[[RHS]], %[[LOG]] : f32 + // CHECK: %[[EXP:.+]] = spirv.GL.Exp %[[MUL]] : f32 + %0 = math.powf %lhs, %rhs : f32 + // CHECK: return %[[EXP]] + return %0: f32 +} + +// CHECK-LABEL: @powf_vector +func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { + // CHECK: spirv.GL.Log %{{.*}} : vector<4xf32> + // CHECK: spirv.FMul %{{.*}} : vector<4xf32> + // CHECK: spirv.GL.Exp %{{.*}} : vector<4xf32> + %0 = math.powf %lhs, %rhs : vector<4xf32> + return %0: vector<4xf32> +} + +// Constant odd integer exponent: parity is known statically, so the lowering +// drops the runtime FToS/BitwiseAnd/IEqual/LogicalAnd parity computation. +// CHECK-LABEL: @powf_const_odd_int_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_odd_int_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 3.000000e+00 : f32 + // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[LHS]] : f32 + // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 // CHECK: %[[F0:.+]] = spirv.Constant 0.000000e+00 : f32 // CHECK: %[[LT:.+]] = spirv.FOrdLessThan %[[LHS]], %[[F0]] : f32 - // CHECK: %[[F1:.+]] = spirv.Constant 1.000000e+00 : f32 - // CHECK: %[[REM:.+]] = spirv.FRem %[[RHS]], %[[F1]] : f32 - // CHECK: %[[IS_FRACTION:.+]] = spirv.FOrdNotEqual %[[REM]], %[[F0]] : f32 - // CHECK: %[[AND:.+]] = spirv.LogicalAnd %[[IS_FRACTION]], %[[LT]] : i1 - // CHECK: %[[NAN:.+]] = spirv.Constant 0x7FC00000 : f32 - // CHECK: %[[NEW_LHS:.+]] = spirv.Select %[[AND]], %[[NAN]], %[[LHS]] : i1, f32 - // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[NEW_LHS]] : f32 - // CHECK: %[[IRHS:.+]] = spirv.ConvertFToS - // CHECK: %[[CST1:.+]] = spirv.Constant 1 : i32 - // CHECK: %[[REM:.+]] = spirv.BitwiseAnd %[[IRHS]] - // CHECK: %[[ODD:.+]] = spirv.IEqual %[[REM]], %[[CST1]] : i32 - // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 // CHECK: %[[NEG:.+]] = spirv.FNegate %[[POW]] : f32 - // CHECK: %[[SNEG:.+]] = spirv.LogicalAnd %[[LT]], %[[ODD]] : i1 - // CHECK: %[[SEL:.+]] = spirv.Select %[[SNEG]], %[[NEG]], %[[POW]] : i1, f32 - %0 = math.powf %lhs, %rhs : f32 + // CHECK: %[[SEL:.+]] = spirv.Select %[[LT]], %[[NEG]], %[[POW]] : i1, f32 + %c = arith.constant 3.0 : f32 + %0 = math.powf %lhs, %c : f32 // CHECK: return %[[SEL]] return %0: f32 } -// CHECK-LABEL: @powf_vector -func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { +// Constant even integer exponent: result is non-negative, no select needed. +// CHECK-LABEL: @powf_const_even_int_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_even_int_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 4.000000e+00 : f32 + // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[LHS]] : f32 + // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 + %c = arith.constant 4.0 : f32 + %0 = math.powf %lhs, %c : f32 + // CHECK: return %[[POW]] + return %0: f32 +} + +// Constant non-integer exponent: falls into the dynamic exp(y*log(x)) path. +// CHECK-LABEL: @powf_const_frac_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_frac_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 2.500000e+00 : f32 + // CHECK: %[[LOG:.+]] = spirv.GL.Log %[[LHS]] : f32 + // CHECK: %[[MUL:.+]] = spirv.FMul %[[RHS]], %[[LOG]] : f32 + // CHECK: %[[EXP:.+]] = spirv.GL.Exp %[[MUL]] : f32 + %c = arith.constant 2.5 : f32 + %0 = math.powf %lhs, %c : f32 + // CHECK: return %[[EXP]] + return %0: f32 +} + +// Splat constant odd integer-valued vector exponent: uniform odd parity. +// CHECK-LABEL: @powf_const_odd_int_exp_vector +func.func @powf_const_odd_int_exp_vector(%lhs: vector<4xf32>) -> vector<4xf32> { + // CHECK: spirv.GL.FAbs + // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> // CHECK: spirv.FOrdLessThan - // CHECK: spirv.FRem - // CHECK: spirv.FOrdNotEqual - // CHECK: spirv.LogicalAnd + // CHECK: spirv.FNegate // CHECK: spirv.Select + %c = arith.constant dense<3.0> : vector<4xf32> + %0 = math.powf %lhs, %c : vector<4xf32> + return %0: vector<4xf32> +} + +// Mixed-parity constant integer-valued vector exponent: per-element odd-mask +// constant is materialized and AND-ed with lhs<0. +// CHECK-LABEL: @powf_const_mixed_int_exp_vector +func.func @powf_const_mixed_int_exp_vector(%lhs: vector<4xf32>) -> vector<4xf32> { // CHECK: spirv.GL.FAbs - // CHECK: spirv.BitwiseAnd %{{.*}} : vector<4xi32> - // CHECK: spirv.IEqual %{{.*}} : vector<4xi32> // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> + // CHECK: spirv.FOrdLessThan // CHECK: spirv.FNegate + // CHECK: %[[ODD:.+]] = spirv.Constant dense<[true, false, true, false]> : vector<4xi1> + // CHECK: spirv.LogicalAnd %{{.*}}, %[[ODD]] : vector<4xi1> // CHECK: spirv.Select - %0 = math.powf %lhs, %rhs : vector<4xf32> + %c = arith.constant dense<[3.0, 2.0, 5.0, 4.0]> : vector<4xf32> + %0 = math.powf %lhs, %c : vector<4xf32> return %0: vector<4xf32> } From 598acfb82d27b583e7adb1526b3cfd27f2a84041 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Sun, 17 May 2026 21:22:35 +0200 Subject: [PATCH 05/17] [sanitizer][test] Fix coverage-module-unloaded.cpp etc. on Solaris (#198164) When switching `clang++` to the default Solaris 11.4 compilation environment, XPG7 + extensions, two tests `FAIL`: ``` AddressSanitizer-i386-sunos :: TestCases/Posix/coverage-module-unloaded.cpp AddressSanitizer-i386-sunos-dynamic :: TestCases/Posix/coverage-module-unloaded.cpp SanitizerCommon-asan-i386-SunOS :: get_module_and_offset_for_pc.cpp SanitizerCommon-ubsan-i386-SunOS :: get_module_and_offset_for_pc.cpp SanitizerCommon-ubsan-x86_64-SunOS :: get_module_and_offset_for_pc.cpp ``` The failure mode is the same in both cases: the tests fail to link with `main` undefined. This happens because `` defines ``` #define SHARED 0x10 ``` for the benefit of the Solaris-specific `memcntl(2)`. This was previously hidden since `clang++` defined `_XOPEN_SOURCE=600`. This interferes with the use of `SHARED` in the tests. While I'm the first to admit that such a generic identifier is highly unfortunate, this is also true for the tests. The interface goes all the way back to Solaris 1.0/SunOS 4, thus isn't going to change. This patch fixes this by following the lead of other tests that use `SHARED_LIB` instead of `SHARED` for this purpose. Tested on `amd64-pc-solaris2.11` and `sparcv9-sun-solaris2.11`. --- .../test/asan/TestCases/Posix/coverage-module-unloaded.cpp | 6 +++--- .../TestCases/get_module_and_offset_for_pc.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp b/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp index feef8f81d70da..7c143701af6a3 100644 --- a/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp @@ -1,8 +1,8 @@ // Check that unloading a module doesn't break coverage dumping for remaining // modules. // RUN: mkdir -p %t.dir && cd %t.dir -// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib1 -fPIC -// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib2 -fPIC +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED_LIB %s -shared -o %dynamiclib1 -fPIC +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED_LIB %s -shared -o %dynamiclib2 -fPIC // RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %libdl -o %t.dir/exe // RUN: mkdir -p %t.tmp/coverage-module-unloaded && cd %t.tmp/coverage-module-unloaded // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t.dir/exe %dynamiclib1 %dynamiclib2 2>&1 | FileCheck %s @@ -18,7 +18,7 @@ #include #include -#ifdef SHARED +#ifdef SHARED_LIB extern "C" { void bar() { printf("bar\n"); } } diff --git a/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp b/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp index 662625e16f3e1..bfabf40fdaa86 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp @@ -1,5 +1,5 @@ // RUN: mkdir -p %t-dir -// RUN: %clangxx -DSHARED %s -shared -o %t-dir/get_module_and_offset_for_pc.so -fPIC +// RUN: %clangxx -DSHARED_LIB %s -shared -o %t-dir/get_module_and_offset_for_pc.so -fPIC // RUN: %clangxx -DSO_DIR=\"%t-dir\" -O0 %s -o %t // RUN: %run %t 2>&1 | FileCheck %s @@ -13,7 +13,7 @@ #include #include -#ifdef SHARED +#ifdef SHARED_LIB extern "C" { int foo() { return 1; } } From 0e92b557a78eddc7493ade4cfc0a3b6b93ee61cf Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Sun, 17 May 2026 21:23:22 +0200 Subject: [PATCH 06/17] [sanitizer_common] Fix sanitizer_platform_limits_solaris.cpp compilation (#198158) When switching `clang++` to the default Solaris 11.4 compilation environment, XPG7 + extensions, `sanitizer_platform_limits_solaris.cpp` fails to compile: ``` compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp:93:53: error: use of undeclared identifier 'ucontext_t'; did you mean 'ucontext_t_sz'? 93 | unsigned ucontext_t_sz(void *ctx) { return sizeof(ucontext_t); } | ^~~~~~~~~~ | ucontext_t_sz compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp:93:12: note: 'ucontext_t_sz' declared here 93 | unsigned ucontext_t_sz(void *ctx) { return sizeof(ucontext_t); } | ^ compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp:93:52: error: invalid application of 'sizeof' to a function type 93 | unsigned ucontext_t_sz(void *ctx) { return sizeof(ucontext_t); } | ^~~~~~~~~~~~ ``` Previously, `` would include `` due to a XPG4v2 requirement. Now the latter needs to be included directly. Tested on `amd64-pc-solaris2.11` and `sparcv9-sun-solaris2.11`. --- .../lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp index 7ea6134b702bf..1cc64a38aaed8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include From c497efb82a02a1dc1126bda17f9373f120df298a Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Sun, 17 May 2026 21:30:48 +0200 Subject: [PATCH 07/17] [InstCombine] Convert logical and/or with trunc nuw to i1 into bitwise ops (#198178) if it is know that `trunc nuw to i1 ` can not be poison logical and/or can be folded to bitwise ops. proof https://alive2.llvm.org/ce/z/xQ2Sj- --- .../InstCombine/InstCombineSelect.cpp | 18 ++++-- .../Transforms/InstCombine/logical-select.ll | 55 +++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a064aa1016399..15e4668671807 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3653,7 +3653,7 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal, /// already poison. For example, if ValAssumedPoison is `icmp samesign X, 10` /// and V is `icmp ne X, 5`, impliesPoisonOrCond returns true. static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V, - bool Expected) { + bool Expected, const SimplifyQuery &SQ) { if (impliesPoison(ValAssumedPoison, V)) return true; @@ -3678,6 +3678,14 @@ static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V, *RHSC2); } } + Value *A; + if (match(ValAssumedPoison, m_NUWTrunc(m_Value(A))) && + isGuaranteedNotToBePoison(A)) { + assert(ValAssumedPoison->getType()->isIntOrIntVectorTy(1)); + return computeKnownBits( + A, SQ.getWithInstruction(cast(ValAssumedPoison))) + .getMaxValue() == 1; + } return false; } @@ -3703,13 +3711,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // checks whether folding it does not convert a well-defined value into // poison. if (match(TrueVal, m_One())) { - if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false)) { + if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false, SQ)) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) && - impliesPoisonOrCond(FalseVal, B, /*Expected=*/false)) { + impliesPoisonOrCond(FalseVal, B, /*Expected=*/false, SQ)) { // (A || B) || C --> A || (B | C) Value *LOr = Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal)); if (auto *I = dyn_cast(LOr)) { @@ -3749,13 +3757,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { } if (match(FalseVal, m_Zero())) { - if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true)) { + if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true, SQ)) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) && - impliesPoisonOrCond(TrueVal, B, /*Expected=*/true)) { + impliesPoisonOrCond(TrueVal, B, /*Expected=*/true, SQ)) { // (A && B) && C --> A && (B & C) Value *LAnd = Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal)); if (auto *I = dyn_cast(LAnd)) { diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index e6de063969a6a..85e8c98455c91 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -1637,3 +1637,58 @@ define <2 x i1> @test_logical_and_icmp_samesign_vec_with_poison_tv(<2 x i8> %x) %and = select <2 x i1> %cmp1, <2 x i1> %cmp2, <2 x i1> zeroinitializer ret <2 x i1> %and } + +define i1 @test_logical_and_trunc_nuw(i1 %c, i8 noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_and_trunc_nuw( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} + +define i1 @test_logical_or_trunc_nuw(i1 %c, i8 noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_or_trunc_nuw( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret i1 [[OR]] +; + %trunc = trunc nuw i8 %x to i1 + %or = select i1 %c, i1 true, i1 %trunc + ret i1 %or +} + +define <2 x i1> @test_logical_and_trunc_nuw_vec(<2 x i1> %c, <2 x i8> noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_and_trunc_nuw_vec( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw <2 x i8> [[X:%.*]] to <2 x i1> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret <2 x i1> [[AND]] +; + %trunc = trunc nuw <2 x i8> %x to <2 x i1> + %and = select <2 x i1> %c, <2 x i1> %trunc, <2 x i1> zeroinitializer + ret <2 x i1> %and +} + +define i1 @neg_test_logical_and_trunc_nuw_no_range(i1 %c, i8 noundef %x) { +; CHECK-LABEL: @neg_test_logical_and_trunc_nuw_no_range( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C:%.*]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} + +define i1 @neg_test_logical_and_trunc_nuw_no_noundef(i1 %c, i8 range(i8 0,2) %x) { +; CHECK-LABEL: @neg_test_logical_and_trunc_nuw_no_noundef( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C:%.*]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} From df905252dbe931929cd3118630e5d13dc9d470d8 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 17 May 2026 12:31:21 -0700 Subject: [PATCH 08/17] [JTS] Readd assertion Now that VP metadata has been cleaned up a little bit, we can reenable this assertion. Reviewers: alexander-shaposhnikov, mtrofin Pull Request: https://github.com/llvm/llvm-project/pull/198141 --- llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index da581e41dc9a8..d5696bc9ea956 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -158,10 +158,9 @@ expandToSwitch(CallBase *CB, const JumpTableTy &JT, DomTreeUpdater &DTU, for (const auto &[G, C] : Targets) { [[maybe_unused]] auto It = GuidToCounter.insert({G, C}); - // TODO(boomanaiden154): Currently we do not assert on inserting - // duplicate GUIDs because we might have multiple zeros when the profile - // loader fails to map addresses to functions. Readd the assertion that - // we did insert once this has been fixed. + // We should always be inserting as it is verifier-enforced IR invariant + // that VP metadata does not have duplicate values. + assert(It.second); } } for (auto [Index, Func] : llvm::enumerate(JT.Funcs)) { From 856f7d4d33d425e2e993c37785d941cac9a0e3b9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 17 May 2026 20:37:52 +0100 Subject: [PATCH 09/17] [LV] Extract helper to simplify phi removal in connectEpiVectorL (NFC) (#198203) Extract the repeated edge-redirect + DomTree update pattern into a RedirectEdge lambda, and convert the separate removeIncomingValue calls for check blocks into a loop. --- .../Transforms/Vectorize/LoopVectorize.cpp | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1de70dfd09a83..f689dc12349ac 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7869,40 +7869,30 @@ static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L, assert(EPI.MainLoopIterationCountCheck && EPI.EpilogueIterationCountCheck && "expected this to be saved from the previous pass."); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, VecEpiloguePreHeader); - DTU.applyUpdates({{DominatorTree::Delete, EPI.MainLoopIterationCountCheck, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, EPI.MainLoopIterationCountCheck, - VecEpiloguePreHeader}}); + // Helper to redirect an edge from \p BB to \p VecEpilogueIterationCountCheck + // to \p NewSucc instead, updating the DomTree. + auto RedirectEdge = [&](BasicBlock *BB, BasicBlock *NewSucc) { + BB->getTerminator()->replaceUsesOfWith(VecEpilogueIterationCountCheck, + NewSucc); + DTU.applyUpdates( + {{DominatorTree::Delete, BB, VecEpilogueIterationCountCheck}, + {DominatorTree::Insert, BB, NewSucc}}); + }; + + RedirectEdge(EPI.MainLoopIterationCountCheck, VecEpiloguePreHeader); BasicBlock *ScalarPH = cast(EpiPlan.getScalarPreheader())->getIRBasicBlock(); - EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates( - {{DominatorTree::Delete, EPI.EpilogueIterationCountCheck, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, EPI.EpilogueIterationCountCheck, ScalarPH}}); + RedirectEdge(EPI.EpilogueIterationCountCheck, ScalarPH); // Adjust the terminators of runtime check blocks and phis using them. BasicBlock *SCEVCheckBlock = Checks.getSCEVChecks().second; BasicBlock *MemCheckBlock = Checks.getMemRuntimeChecks().second; - if (SCEVCheckBlock) { - SCEVCheckBlock->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates({{DominatorTree::Delete, SCEVCheckBlock, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, SCEVCheckBlock, ScalarPH}}); - } - if (MemCheckBlock) { - MemCheckBlock->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates( - {{DominatorTree::Delete, MemCheckBlock, VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, MemCheckBlock, ScalarPH}}); - } + if (SCEVCheckBlock) + RedirectEdge(SCEVCheckBlock, ScalarPH); + if (MemCheckBlock) + RedirectEdge(MemCheckBlock, ScalarPH); // The vec.epilog.iter.check block may contain Phi nodes from inductions // or reductions which merge control-flow from the latch block and the @@ -7925,11 +7915,11 @@ static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L, return EPI.EpilogueIterationCountCheck == IncB; })) continue; - Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck); - if (SCEVCheckBlock) - Phi->removeIncomingValue(SCEVCheckBlock); - if (MemCheckBlock) - Phi->removeIncomingValue(MemCheckBlock); + for (BasicBlock *BB : + {EPI.EpilogueIterationCountCheck, SCEVCheckBlock, MemCheckBlock}) { + if (BB) + Phi->removeIncomingValue(BB); + } } auto IP = VecEpiloguePreHeader->getFirstNonPHIIt(); From 46c1fa8d1759feaefae1f6f88e1768e77c017365 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 17 May 2026 21:17:42 +0100 Subject: [PATCH 10/17] [VPlan] Use ResumeForEpilogue to get epilogue vector trip count (NFC). (#198210) Use ResumeForEpilogue to look up the vector trip count instead of plain IR lookup. Also prepares for non-phi resume values. --- .../Transforms/Vectorize/LoopVectorize.cpp | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f689dc12349ac..c2d3f7755c9b0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7604,7 +7604,7 @@ preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { /// preheader of the vector epilogue loop, after created by the execution of \p /// Plan. static SmallVector preparePlanForEpilogueVectorLoop( - VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, + VPlan &MainPlan, VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel &CM, VFSelectionContext &Config, ScalarEvolution &SE) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); @@ -7614,28 +7614,30 @@ static SmallVector preparePlanForEpilogueVectorLoop( VPValue *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to start // at the resume value from the main vector loop. Find the resume value - // created during execution of the main VPlan. It must be the first phi in the - // loop preheader. Add this resume value as an offset to the canonical IV of - // the epilogue loop. + // created during execution of the main VPlan. Add this resume value as an + // offset to the canonical IV of the epilogue loop. using namespace llvm::PatternMatch; - PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); - for (Value *Inc : EPResumeVal->incoming_values()) { - if (match(Inc, m_SpecificInt(0))) - continue; - assert(!EPI.VectorTripCount && - "Must only have a single non-zero incoming value"); - EPI.VectorTripCount = Inc; - } - // If we didn't find a non-zero vector trip count, all incoming values - // must be zero, which also means the vector trip count is zero. Pick the - // first zero as vector trip count. - // TODO: We should not choose VF * UF so the main vector loop is known to - // be dead. - if (!EPI.VectorTripCount) { - assert(EPResumeVal->getNumIncomingValues() > 0 && - all_of(EPResumeVal->incoming_values(), match_fn(m_SpecificInt(0))) && - "all incoming values must be 0"); - EPI.VectorTripCount = EPResumeVal->getOperand(0); + VPInstruction *ResumeForEpilogue = + cast(&*MainPlan.getScalarPreheader()->getFirstNonPhi()); + Value *EPResumeVal = ResumeForEpilogue->getUnderlyingValue(); + if (auto *ResumePhi = dyn_cast(EPResumeVal)) { + for (Value *Inc : ResumePhi->incoming_values()) { + if (match(Inc, m_SpecificInt(0))) + continue; + assert(!EPI.VectorTripCount && + "Must only have a single non-zero incoming value"); + EPI.VectorTripCount = Inc; + } + // If we didn't find a non-zero vector trip count, all incoming values + // must be zero, which also means the vector trip count is zero. + if (!EPI.VectorTripCount) { + assert(ResumePhi->getNumIncomingValues() > 0 && + all_of(ResumePhi->incoming_values(), match_fn(m_SpecificInt(0))) && + "all incoming values must be 0"); + EPI.VectorTripCount = ResumePhi->getIncomingValue(0); + } + } else { + EPI.VectorTripCount = EPResumeVal; } VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); assert(all_of(IV->users(), @@ -8371,7 +8373,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, Checks, BestEpiPlan); SmallVector InstsToMove = preparePlanForEpilogueVectorLoop( - BestEpiPlan, L, ExpandedSCEVs, EPI, CM, Config, *PSE.getSE()); + BestMainPlan, BestEpiPlan, L, ExpandedSCEVs, EPI, CM, Config, + *PSE.getSE()); LVP.attachRuntimeChecks(BestEpiPlan, Checks, HasBranchWeights); LVP.executePlan( EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, From c5c8e91e1b1755ac2120e9be89f0f33e4e7a95e0 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sun, 17 May 2026 13:41:12 -0700 Subject: [PATCH 11/17] [ICP] Update comment about duplicate values in VP MD With https://github.com/llvm/llvm-project/pull/196649 deduplicating VP values and https://github.com/llvm/llvm-project/pull/193083 enforcing this, we no longer need to worry about duplicate values, zero or otherwise. Update the comment to reflect this. Reviewers: mingmingl-llvm, teresajohnson Reviewed By: mingmingl-llvm Pull Request: https://github.com/llvm/llvm-project/pull/198140 --- .../Transforms/Instrumentation/IndirectCallPromotion.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index f3cdb3518ddcb..9fe751a56df64 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -641,9 +641,9 @@ Instruction *IndirectCallPromoter::computeVTableInfos( continue; auto &Candidate = Candidates[CalleeIndexIter->second]; - // There shouldn't be duplicate GUIDs in one !prof metadata (except - // duplicated zeros), so assign counters directly won't cause overwrite or - // counter loss. + // There should never be duplicate GUIDs in one !prof metdata, as this is + // an IR invariant enforced by the verifier. Assigning counters directly + // won't cause overwrite or counter loss. Candidate.VTableGUIDAndCounts[VTableVal] = V.Count; Candidate.AddressPoints.push_back( getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); From 5f2bedca745d5efa1955369cfe352bcd09be4633 Mon Sep 17 00:00:00 2001 From: Marcos Ramirez Joos Date: Sun, 17 May 2026 22:43:47 +0200 Subject: [PATCH 12/17] Add clang warning if fp exception functions are called without appropriate flags/pragmas (#187860) Fixes https://github.com/llvm/llvm-project/issues/128239 The implementation adds warnings for floating-point exception function calls (fenv.h) made without enabling floating-point exception behavior via `-ffp-exception-behavior=maytrap/strict` or `#pragma STDC FENV_ACCESS ON`. To support recognition of all fenv.h builtins, `fexcept_t` and `fenv_t` were added as builtin types. --- clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/AST/ASTContext.h | 35 +++++++++- clang/include/clang/Basic/BuiltinHeaders.def | 1 + clang/include/clang/Basic/Builtins.td | 55 +++++++++++++++ .../clang/Basic/DiagnosticSemaKinds.td | 5 ++ clang/include/clang/Basic/TokenKinds.def | 2 + clang/include/clang/Sema/Sema.h | 4 ++ .../include/clang/Serialization/ASTBitCodes.h | 8 ++- clang/lib/AST/ASTContext.cpp | 19 ++++++ clang/lib/Sema/SemaChecking.cpp | 18 +++++ clang/lib/Sema/SemaDecl.cpp | 8 +++ clang/lib/Serialization/ASTReader.cpp | 36 ++++++++++ clang/lib/Serialization/ASTWriter.cpp | 2 + clang/test/PCH/builtins-fenv.c | 25 +++++++ clang/test/PCH/builtins-fenv.h | 18 +++++ clang/test/Sema/builtin-fenv.c | 51 ++++++++++++++ clang/test/Sema/fenv-access-implicit.c | 35 ++++++++++ clang/test/Sema/fenv-access-unevaluated.cpp | 31 +++++++++ clang/test/Sema/fenv-access.c | 68 +++++++++++++++++++ clang/utils/TableGen/ClangBuiltinsEmitter.cpp | 2 + 20 files changed, 424 insertions(+), 2 deletions(-) create mode 100644 clang/test/PCH/builtins-fenv.c create mode 100644 clang/test/PCH/builtins-fenv.h create mode 100644 clang/test/Sema/builtin-fenv.c create mode 100644 clang/test/Sema/fenv-access-implicit.c create mode 100644 clang/test/Sema/fenv-access-unevaluated.cpp create mode 100644 clang/test/Sema/fenv-access.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5660f9670eae..0feecef6bbd4f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -506,6 +506,9 @@ Improvements to Clang's diagnostics - Added ``-Wattribute-alias`` to diagnose type mismatches between an alias and its aliased function. (#GH195550) +- Added warnings for floating-point exception function calls (fenv.h) without enabling + floating-point exception behavior via the appropriate flags or pragmas. (#GH128239) + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index b2fd522e6865c..d0a402e59ff60 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -498,6 +498,12 @@ class ASTContext : public RefCountedBase { /// The type for the C ucontext_t type. TypeDecl *ucontext_tDecl = nullptr; + /// The type for the C fexcept_t type. + TypeDecl *fexcept_tDecl = nullptr; + + /// The type for the C fenv_t type. + TypeDecl *fenv_tDecl = nullptr; + /// Type for the Block descriptor for Blocks CodeGen. /// /// Since this is only used for generation of debug info, it is not @@ -2350,6 +2356,30 @@ class ASTContext : public RefCountedBase { return QualType(); } + /// Set the type for the C fexcept_t type. + void setfexcept_tDecl(TypeDecl *fexcept_tDecl) { + this->fexcept_tDecl = fexcept_tDecl; + } + + /// Retrieve the C fexcept_t type. + QualType getfexcept_tType() const { + if (fexcept_tDecl) + return getTypeDeclType(ElaboratedTypeKeyword::None, + /*Qualifier=*/std::nullopt, fexcept_tDecl); + return QualType(); + } + + /// Set the type for the C fenv_t type. + void setfenv_tDecl(TypeDecl *fenv_tDecl) { this->fenv_tDecl = fenv_tDecl; } + + /// Retrieve the C fenv_t type. + QualType getfenv_tType() const { + if (fenv_tDecl) + return getTypeDeclType(ElaboratedTypeKeyword::None, + /*Qualifier=*/std::nullopt, fenv_tDecl); + return QualType(); + } + /// The result type of logical operations, '<', '>', '!=', etc. CanQualType getLogicalOperationType() const { return getLangOpts().CPlusPlus ? BoolTy : IntTy; @@ -2630,7 +2660,10 @@ class ASTContext : public RefCountedBase { GE_Missing_setjmp, /// Missing a type from - GE_Missing_ucontext + GE_Missing_ucontext, + + /// Missing a type from + GE_Missing_fenv }; QualType DecodeTypeStr(const char *&Str, const ASTContext &Context, diff --git a/clang/include/clang/Basic/BuiltinHeaders.def b/clang/include/clang/Basic/BuiltinHeaders.def index 23889a22769ed..f77665a2e8975 100644 --- a/clang/include/clang/Basic/BuiltinHeaders.def +++ b/clang/include/clang/Basic/BuiltinHeaders.def @@ -17,6 +17,7 @@ HEADER(BLOCKS_H, "Blocks.h") HEADER(COMPLEX_H, "complex.h") HEADER(CTYPE_H, "ctype.h") HEADER(EMMINTRIN_H, "emmintrin.h") +HEADER(FENV_H, "fenv.h") HEADER(FOUNDATION_NSOBJCRUNTIME_H, "Foundation/NSObjCRuntime.h") HEADER(IMMINTRIN_H, "immintrin.h") HEADER(INTRIN_H, "intrin.h") diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 40ec94ab75046..7b833487e23a2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4599,6 +4599,61 @@ def BlockObjectDispose : LibBuiltin<"blocks.h"> { } // FIXME: Also declare NSConcreteGlobalBlock and NSConcreteStackBlock. +def FeClearExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feclearexcept"]; + let Prototype = "int(int)"; +} + +def FeGetExceptFlag : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetexceptflag"]; + let Prototype = "int(fexcept_t*, int)"; +} + +def FeRaiseExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feraiseexcept"]; + let Prototype = "int(int)"; +} + +def FeSetExceptFlag : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetexceptflag"]; + let Prototype = "int(fexcept_t const*, int)"; +} + +def FeTestExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["fetestexcept"]; + let Prototype = "int(int)"; +} + +def FeGetRound : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetround"]; + let Prototype = "int()"; +} + +def FeSetRound : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetround"]; + let Prototype = "int(int)"; +} + +def FeGetEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetenv"]; + let Prototype = "int(fenv_t*)"; +} + +def FeHoldExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feholdexcept"]; + let Prototype = "int(fenv_t*)"; +} + +def FeSetEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetenv"]; + let Prototype = "int(fenv_t const*)"; +} + +def FeUpdateEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["feupdateenv"]; + let Prototype = "int(fenv_t const*)"; +} + def __Addressof : LangBuiltin<"CXX_LANG"> { let Spellings = ["__addressof"]; let Attributes = [FunctionWithoutBuiltinPrefix, NoThrow, Const, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d7dd20d6a45e4..c73c116cdc451 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1041,6 +1041,11 @@ def err_ptrauth_indirect_goto_addrlabel_arithmetic : Error< "%select{subtraction|addition}0 of address-of-label expressions is not " "supported with ptrauth indirect gotos">; +def warn_fe_access_without_fenv_access : Warning< + "'%0' used without enabling floating-point exception behavior; use 'pragma STDC " + "FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'">, + InGroup>; + // __ptrauth qualifier def err_ptrauth_qualifier_invalid : Error< "%select{return type|parameter type|property}1 may not be qualified with " diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index f07d8ebb75035..4bd21b4112580 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -877,6 +877,8 @@ NOTABLE_IDENTIFIER(FILE) NOTABLE_IDENTIFIER(jmp_buf) NOTABLE_IDENTIFIER(sigjmp_buf) NOTABLE_IDENTIFIER(ucontext_t) +NOTABLE_IDENTIFIER(fexcept_t) +NOTABLE_IDENTIFIER(fenv_t) NOTABLE_IDENTIFIER(float_t) NOTABLE_IDENTIFIER(double_t) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5202244cee2a7..1c8a47169bbd3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8264,6 +8264,10 @@ class Sema final : public SemaBase { return currentEvaluationContext().isUnevaluated(); } + bool isPotentiallyEvaluatedContext() const { + return currentEvaluationContext().isPotentiallyEvaluated(); + } + bool isImmediateFunctionContext() const { return currentEvaluationContext().isImmediateFunctionContext(); } diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 3c8f3ba59a07e..4bb287af687b5 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1225,7 +1225,13 @@ enum SpecialTypeIDs { SPECIAL_TYPE_OBJC_SEL_REDEFINITION = 6, /// C ucontext_t typedef type - SPECIAL_TYPE_UCONTEXT_T = 7 + SPECIAL_TYPE_UCONTEXT_T = 7, + + /// C fexcept_t typedef type + SPECIAL_TYPE_FEXCEPT_T = 8, + + /// C fenv_t typedef type + SPECIAL_TYPE_FENV_T = 9 }; /// The number of special type IDs. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bc4771aec77d1..412ba583353d2 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12849,6 +12849,25 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, case 'm': Type = Context.MFloat8Ty; break; + case 'T': + switch (*Str++) { + case 'x': { + Type = Context.getfexcept_tType(); + break; + } + case 'e': { + Type = Context.getfenv_tType(); + break; + } + default: { + llvm_unreachable("Unexpected target builtin type"); + } + } + if (Type.isNull()) { + Error = ASTContext::GE_Missing_fenv; + return {}; + } + break; } // If there are modifiers and if we're allowed to parse them, go for it. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cc834bbee23c4..03091f2ba0cfe 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3928,6 +3928,24 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (BuiltinCountedByRef(TheCall)) return ExprError(); break; + + case Builtin::BIfeclearexcept: + case Builtin::BIfegetexceptflag: + case Builtin::BIferaiseexcept: + case Builtin::BIfesetexceptflag: + case Builtin::BIfetestexcept: + case Builtin::BIfegetround: + case Builtin::BIfesetround: + case Builtin::BIfegetenv: + case Builtin::BIfeholdexcept: + case Builtin::BIfesetenv: + case Builtin::BIfeupdateenv: + if (TheCall->getFPFeaturesInEffect(getLangOpts()).getExceptionMode() == + LangOptions::FPE_Ignore && + isPotentiallyEvaluatedContext()) { + Diag(TheCall->getBeginLoc(), diag::warn_fe_access_without_fenv_access) + << FDecl->getName() << TheCall->getSourceRange(); + } } if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall)) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4b9576479e29e..51ad5ac5f9e62 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2398,6 +2398,8 @@ static StringRef getHeaderName(Builtin::Context &BuiltinInfo, unsigned ID, return "setjmp.h"; case ASTContext::GE_Missing_ucontext: return "ucontext.h"; + case ASTContext::GE_Missing_fenv: + return "fenv.h"; } llvm_unreachable("unhandled error kind"); } @@ -7018,6 +7020,12 @@ Sema::ActOnTypedefNameDecl(Scope *S, DeclContext *DC, TypedefNameDecl *NewTD, case tok::NotableIdentifierKind::ucontext_t: Context.setucontext_tDecl(NewTD); break; + case tok::NotableIdentifierKind::fexcept_t: + Context.setfexcept_tDecl(NewTD); + break; + case tok::NotableIdentifierKind::fenv_t: + Context.setfenv_tDecl(NewTD); + break; case tok::NotableIdentifierKind::float_t: case tok::NotableIdentifierKind::double_t: NewTD->addAttr(AvailableOnlyInDefaultEvalMethodAttr::Create(Context)); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0f834859e982a..5425c1da7419b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5758,6 +5758,42 @@ void ASTReader::InitializeContext() { } } } + + if (TypeID Fexcept_t = SpecialTypes[SPECIAL_TYPE_FEXCEPT_T]) { + QualType Fexcept_tType = GetType(Fexcept_t); + if (Fexcept_tType.isNull()) { + Error("fexcept_t type is NULL"); + return; + } + + if (!Context.fexcept_tDecl) { + if (const TypedefType *Typedef = Fexcept_tType->getAs()) + Context.setfexcept_tDecl(Typedef->getDecl()); + else { + const TagType *Tag = Fexcept_tType->getAs(); + assert(Tag && "Invalid fexcept_t type in AST file"); + Context.setfexcept_tDecl(Tag->getDecl()); + } + } + } + + if (TypeID Fenv_t = SpecialTypes[SPECIAL_TYPE_FENV_T]) { + QualType Fenv_tType = GetType(Fenv_t); + if (Fenv_tType.isNull()) { + Error("fenv_t type is NULL"); + return; + } + + if (!Context.fenv_tDecl) { + if (const TypedefType *Typedef = Fenv_tType->getAs()) + Context.setfenv_tDecl(Typedef->getDecl()); + else { + const TagType *Tag = Fenv_tType->getAs(); + assert(Tag && "Invalid fenv_t type in AST file"); + Context.setfenv_tDecl(Tag->getDecl()); + } + } + } } ReadPragmaDiagnosticMappings(Context.getDiagnostics()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1970ed86589b5..c0c4aa107e200 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6158,6 +6158,8 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema *SemaPtr, StringRef isysroot, AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes); AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes); AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes); + AddTypeRef(Context, Context.getfexcept_tType(), SpecialTypes); + AddTypeRef(Context, Context.getfenv_tType(), SpecialTypes); } if (SemaPtr) diff --git a/clang/test/PCH/builtins-fenv.c b/clang/test/PCH/builtins-fenv.c new file mode 100644 index 0000000000000..72bcf4a134730 --- /dev/null +++ b/clang/test/PCH/builtins-fenv.c @@ -0,0 +1,25 @@ +// Test this without pch. +// RUN: %clang_cc1 -include %S/builtins-fenv.h -fsyntax-only -verify %s + +// Test with pch. +// RUN: %clang_cc1 -emit-pch -o %t %S/builtins-fenv.h +// RUN: %clang_cc1 -include-pch %t -fsyntax-only -verify %s + +// expected-no-diagnostics +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +void f(void) { + #pragma STDC FENV_ACCESS ON + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +} diff --git a/clang/test/PCH/builtins-fenv.h b/clang/test/PCH/builtins-fenv.h new file mode 100644 index 0000000000000..8397c270df58e --- /dev/null +++ b/clang/test/PCH/builtins-fenv.h @@ -0,0 +1,18 @@ +// Header for PCH test builtins-fenv.c + +#define FE_INVALID 1 + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); diff --git a/clang/test/Sema/builtin-fenv.c b/clang/test/Sema/builtin-fenv.c new file mode 100644 index 0000000000000..db8a5334d1073 --- /dev/null +++ b/clang/test/Sema/builtin-fenv.c @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DWRONG_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DRIGHT_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DONLY_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DNO_FEGETEXCEPTFLAG %s -ast-dump 2>&1 | FileCheck %s --check-prefixes=CHECK1 + +// tests inspired by clang/test/Sema/builtin-setjmp.c + +#ifdef __cplusplus +extern "C" { +#endif + +#if WRONG_FEXCEPT_T +typedef unsigned short int fexcept_t; +extern int fegetexceptflag(int, int); // c-warning {{incompatible redeclaration of library function 'fegetexceptflag'}} + // c-note@-1 {{'fegetexceptflag' is a builtin with type 'int (fexcept_t *, int)' (aka 'int (unsigned short *, int)')}} +#elif RIGHT_FEXCEPT_T +// c-no-diagnostics +typedef unsigned short int fexcept_t; +extern int fegetexceptflag(unsigned short int *, int); // OK, right type. +#elif ONLY_FEXCEPT_T +typedef long *fexcept_t; +#endif + +void use(void) { + #pragma STDC FENV_ACCESS ON + fegetexceptflag(0, 0); + #if NO_FEGETEXCEPTFLAG + // cxx-error@-2 {{undeclared identifier 'fegetexceptflag'}} + // c-error@-3 {{call to undeclared function 'fegetexceptflag'; ISO C99 and later do not support implicit function declarations}} + // c-warning@-4 {{declaration of built-in function 'fegetexceptflag' requires inclusion of the header }} + #elif ONLY_FEXCEPT_T + // cxx-error@-6 {{undeclared identifier 'fegetexceptflag'}} + // c-error@-7 {{call to undeclared library function 'fegetexceptflag' with type 'int (fexcept_t *, int)' (aka 'int (long **, int)'); ISO C99 and later do not support implicit function declarations}} + // c-note@-8 {{include the header or explicitly provide a declaration for 'fegetexceptflag'}} + #else + // cxx-no-diagnostics + #endif + + #ifdef NO_FEGETEXCEPTFLAG + // In this case, the regular AST dump doesn't dump the implicit declaration of 'fegetexceptflag'. + #pragma clang __debug dump fegetexceptflag + #endif +} + +// CHECK1: FunctionDecl {{.*}} used fegetexceptflag +// CHECK2: BuiltinAttr {{.*}} Implicit + + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/fenv-access-implicit.c b/clang/test/Sema/fenv-access-implicit.c new file mode 100644 index 0000000000000..0c4bd6b0eb855 --- /dev/null +++ b/clang/test/Sema/fenv-access-implicit.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +#define FE_INVALID 1 + +void test_fenv_access_undeclared(void) { + #pragma STDC FENV_ACCESS ON + feclearexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'feclearexcept'}} \ + expected-error {{call to undeclared library function 'feclearexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetexceptflag(flagp, FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fegetexceptflag'}} \ + expected-error {{call to undeclared library function 'fegetexceptflag' with type 'int (fexcept_t *, int)' (aka 'int (unsigned short *, int)'); ISO C99 and later do not support implicit function declarations}} + feraiseexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'feraiseexcept'}} \ + expected-error {{call to undeclared library function 'feraiseexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fesetexceptflag(flagp, FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fesetexceptflag'}} \ + expected-error {{call to undeclared library function 'fesetexceptflag' with type 'int (const fexcept_t *, int)' (aka 'int (const unsigned short *, int)'); ISO C99 and later do not support implicit function declarations}} + fetestexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fetestexcept'}} \ + expected-error {{call to undeclared library function 'fetestexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetround(); // expected-note {{include the header or explicitly provide a declaration for 'fegetround'}} \ + expected-error {{call to undeclared library function 'fegetround' with type 'int (void)'; ISO C99 and later do not support implicit function declarations}} + fesetround(0); // expected-note {{include the header or explicitly provide a declaration for 'fesetround'}} \ + expected-error {{call to undeclared library function 'fesetround' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'fegetenv'}} \ + expected-error {{call to undeclared library function 'fegetenv' with type 'int (fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + feholdexcept(envp); // expected-note {{include the header or explicitly provide a declaration for 'feholdexcept'}} \ + expected-error {{call to undeclared library function 'feholdexcept' with type 'int (fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + fesetenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'fesetenv'}} \ + expected-error {{call to undeclared library function 'fesetenv' with type 'int (const fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + feupdateenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'feupdateenv'}} \ + expected-error {{call to undeclared library function 'feupdateenv' with type 'int (const fenv_t *)'; ISO C99 and later do not support implicit function declarations}} +} diff --git a/clang/test/Sema/fenv-access-unevaluated.cpp b/clang/test/Sema/fenv-access-unevaluated.cpp new file mode 100644 index 0000000000000..14752ba3c377e --- /dev/null +++ b/clang/test/Sema/fenv-access-unevaluated.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); + +// expected-no-diagnostics +void test_fenv_access_unevaluated() { + decltype(::feclearexcept) a; + decltype(::fegetexceptflag) b; + decltype(::feraiseexcept) c; + decltype(::fesetexceptflag) d; + decltype(::fetestexcept) e; + decltype(::fegetround) f; + decltype(::fesetround) g; + decltype(::fegetenv) h; + decltype(::feholdexcept) i; + decltype(::fesetenv) j; + decltype(::feupdateenv) k; +} diff --git a/clang/test/Sema/fenv-access.c b/clang/test/Sema/fenv-access.c new file mode 100644 index 0000000000000..3a7b95af7ab4c --- /dev/null +++ b/clang/test/Sema/fenv-access.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s +// RUN: %clang_cc1 -verify -Wfenv-access -ffp-exception-behavior=maytrap -DNO_WARN %s +// RUN: %clang_cc1 -verify -Wfenv-access -ffp-exception-behavior=strict -DNO_WARN %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); + +#define FE_INVALID 1 + +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +void test_fenv_access_off(void) { +#ifdef NO_WARN + // expected-no-diagnostics + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +#else + feclearexcept(FE_INVALID); // expected-warning {{'feclearexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetexceptflag(flagp, FE_INVALID); // expected-warning {{'fegetexceptflag' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feraiseexcept(FE_INVALID); // expected-warning {{'feraiseexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetexceptflag(flagp, FE_INVALID); // expected-warning {{'fesetexceptflag' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fetestexcept(FE_INVALID); // expected-warning {{'fetestexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetround(); // expected-warning {{'fegetround' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetround(0); // expected-warning {{'fesetround' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetenv(envp); // expected-warning {{'fegetenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feholdexcept(envp); // expected-warning {{'feholdexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetenv(envp); // expected-warning {{'fesetenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feupdateenv(envp); // expected-warning {{'feupdateenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} +#endif +} + +void test_fenv_access_on(void) { + #pragma STDC FENV_ACCESS ON + fesetround(0); + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +} diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp index c2e38c0d6aeb8..2394c3b299c81 100644 --- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp +++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp @@ -376,6 +376,8 @@ class PrototypeParser { .Case("uint64_t", "UWi") .Case("void", "v") .Case("wchar_t", "w") + .Case("fexcept_t", "Tx") + .Case("fenv_t", "Te") .Case("...", ".") .Default("error"); if (ReturnTypeVal == "error") From 3a1206a7551a7d071f3105bd9a12e8a8169f8e6f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 17 May 2026 21:48:21 +0100 Subject: [PATCH 13/17] [LV] Add find-last-iv test with blend (NFC). (#198214) Add test cases showing functional change for https://github.com/llvm/llvm-project/pull/194729. --- .../LoopVectorize/epilog-iv-select-cmp.ll | 80 ++++++++++++++++++- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 96e74212dce4e..0a98a895f3223 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -epilogue-vectorization-force-VF=4 -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -epilogue-vectorization-force-VF=4 -force-target-supports-masked-memory-ops -S < %s | FileCheck %s define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-LABEL: define i64 @select_icmp_const( @@ -908,3 +908,79 @@ loop: exit: ret i64 %sel } + + +define i32 @predicated_iv_select(ptr %A) { +; CHECK-LABEL: define i32 @predicated_iv_select( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[VECTOR_MAIN_LOOP_ITER_CHECK:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi <8 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[WIDE_LOAD]], splat (i32 -1) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[VEC_IND]], ptr align 4 [[TMP0]], <8 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5]] = select i1 [[TMP4]], <8 x i1> [[TMP1]], <8 x i1> [[TMP7]] +; CHECK-NEXT: [[TMP6]] = select i1 [[TMP4]], <8 x i32> [[VEC_IND]], <8 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1104 +; CHECK-NEXT: br i1 [[TMP2]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> [[TMP6]], <8 x i1> [[TMP5]], i32 -1) +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1104, %[[VEC_EPILOG_VECTOR_BODY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[TMP8]], %[[VEC_EPILOG_VECTOR_BODY]] ], [ [[LAST_1:%.*]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP15]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label %[[LOOP_THEN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[T]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[LAST_1]] = phi i32 [ [[T]], %[[LOOP_THEN]] ], [ [[RED]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1111 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[LAST_1_LCSSA:%.*]] = phi i32 [ [[LAST_1]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: ret i32 [[LAST_1_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ -1, %entry ], [ %last.1, %loop.latch ] + %gep.A = getelementptr inbounds nuw i32, ptr %A, i64 %iv + %0 = load i32, ptr %gep.A, align 4 + %cmp1 = icmp sgt i32 %0, -1 + br i1 %cmp1, label %loop.then, label %loop.latch + +loop.then: + %t = trunc nuw nsw i64 %iv to i32 + store i32 %t, ptr %gep.A, align 4 + br label %loop.latch + +loop.latch: + %last.1 = phi i32 [ %t, %loop.then ], [ %red, %loop.header ] + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1111 + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %last.1 +} From 6a14487429c48cbdc0cf5b76e160a59fc5e1fa82 Mon Sep 17 00:00:00 2001 From: Anshul Nigham Date: Sun, 17 May 2026 14:01:51 -0700 Subject: [PATCH 14/17] [NewPM] Add newpm port for AArch64LowerHomogeneousPrologEpilog (#197606) --- llvm/lib/Target/AArch64/AArch64.h | 8 ++- .../AArch64LowerHomogeneousPrologEpilog.cpp | 50 ++++++++++++------- .../Target/AArch64/AArch64PassRegistry.def | 1 + .../Target/AArch64/AArch64TargetMachine.cpp | 2 +- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index a8fc28944af34..3ef6e693a4076 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -155,7 +155,7 @@ void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &); void initializeAArch64DeadRegisterDefinitionsLegacyPass(PassRegistry &); void initializeAArch64ExpandPseudoLegacyPass(PassRegistry &); void initializeAArch64LoadStoreOptLegacyPass(PassRegistry &); -void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); +void initializeAArch64LowerHomogeneousPrologEpilogLegacyPass(PassRegistry &); void initializeAArch64CodeLayoutOptPass(PassRegistry &); void initializeAArch64MIPeepholeOptLegacyPass(PassRegistry &); void initializeAArch64O0PreLegalizerCombinerLegacyPass(PassRegistry &); @@ -317,6 +317,12 @@ class AArch64ConditionalComparesPass MachineFunctionAnalysisManager &MFAM); }; +class AArch64LowerHomogeneousPrologEpilogPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp index d69f12e7c0a7c..23fef847a75f1 100644 --- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp +++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp @@ -10,11 +10,13 @@ // //===----------------------------------------------------------------------===// +#include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64InstPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -23,6 +25,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include #include @@ -39,11 +42,11 @@ static cl::opt FrameHelperSizeThreshold( namespace { -class AArch64LowerHomogeneousPE { +class AArch64LowerHomogeneousPrologEpilogImpl { public: const AArch64InstrInfo *TII; - AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) + AArch64LowerHomogeneousPrologEpilogImpl(Module *M, MachineModuleInfo *MMI) : M(M), MMI(MMI) {} bool run(); @@ -69,11 +72,11 @@ class AArch64LowerHomogeneousPE { MachineBasicBlock::iterator &NextMBBI); }; -class AArch64LowerHomogeneousPrologEpilog : public ModulePass { +class AArch64LowerHomogeneousPrologEpilogLegacy : public ModulePass { public: static char ID; - AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {} + AArch64LowerHomogeneousPrologEpilogLegacy() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); @@ -89,22 +92,34 @@ class AArch64LowerHomogeneousPrologEpilog : public ModulePass { } // end anonymous namespace -char AArch64LowerHomogeneousPrologEpilog::ID = 0; +char AArch64LowerHomogeneousPrologEpilogLegacy::ID = 0; -INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, +INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilogLegacy, "aarch64-lower-homogeneous-prolog-epilog", AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) -bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { +bool AArch64LowerHomogeneousPrologEpilogLegacy::runOnModule(Module &M) { if (skipModule(M)) return false; MachineModuleInfo *MMI = &getAnalysis().getMMI(); - return AArch64LowerHomogeneousPE(&M, MMI).run(); + return AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run(); } -bool AArch64LowerHomogeneousPE::run() { +PreservedAnalyses +AArch64LowerHomogeneousPrologEpilogPass::run(Module &M, + ModuleAnalysisManager &MAM) { + MachineModuleInfo *MMI = &MAM.getResult(M).getMMI(); + bool Changed = AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run(); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +bool AArch64LowerHomogeneousPrologEpilogImpl::run() { bool Changed = false; for (auto &F : *M) { if (F.empty()) @@ -455,7 +470,7 @@ static bool shouldUseFrameHelper(MachineBasicBlock &MBB, /// ldp x29, x30, [sp, #32] /// ldp x20, x19, [sp, #16] /// ldp x22, x21, [sp], #48 -bool AArch64LowerHomogeneousPE::lowerEpilog( +bool AArch64LowerHomogeneousPrologEpilogImpl::lowerEpilog( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { auto &MF = *MBB.getParent(); @@ -545,7 +560,7 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( /// stp x22, x21, [sp, #-48]! /// stp x20, x19, [sp, #16] /// stp x29, x30, [sp, #32] -bool AArch64LowerHomogeneousPE::lowerProlog( +bool AArch64LowerHomogeneousPrologEpilogImpl::lowerProlog( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { auto &MF = *MBB.getParent(); @@ -626,9 +641,9 @@ bool AArch64LowerHomogeneousPE::lowerProlog( /// @param MBBI current instruction iterator /// @param NextMBBI next instruction iterator which can be updated /// @return True when IR is changed. -bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); switch (Opcode) { @@ -642,7 +657,7 @@ bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, return false; } -bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -655,7 +670,8 @@ bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { return Modified; } -bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMachineFunction( + MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); bool Modified = false; @@ -665,5 +681,5 @@ bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { } ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { - return new AArch64LowerHomogeneousPrologEpilog(); + return new AArch64LowerHomogeneousPrologEpilogLegacy(); } diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def index de1b5834112e7..e3e47b12a9ec3 100644 --- a/llvm/lib/Target/AArch64/AArch64PassRegistry.def +++ b/llvm/lib/Target/AArch64/AArch64PassRegistry.def @@ -16,6 +16,7 @@ #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("aarch64-lower-homogeneous-prolog-epilog", AArch64LowerHomogeneousPrologEpilogPass()) #undef MODULE_PASS #ifndef FUNCTION_PASS diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 57929316954d6..b8cc69f7569d6 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -277,7 +277,7 @@ LLVMInitializeAArch64Target() { initializeAArch64SLSHardeningLegacyPass(PR); initializeAArch64StackTaggingPass(PR); initializeAArch64StackTaggingPreRALegacyPass(PR); - initializeAArch64LowerHomogeneousPrologEpilogPass(PR); + initializeAArch64LowerHomogeneousPrologEpilogLegacyPass(PR); initializeAArch64DAGToDAGISelLegacyPass(PR); initializeAArch64CondBrTuningPass(PR); initializeAArch64Arm64ECCallLoweringPass(PR); From 938211b6006b29f26e2803c565c03d9fc67ae8c1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 17 May 2026 22:12:46 +0100 Subject: [PATCH 15/17] [LV] Move isMoreProfitable to LoopVectorizationPlanner.cpp (NFC). (#195269) isMoreProfitable does not depend on anything in LoopVectorize.cpp, move it to the recently added LoopVectorizationPlanner.cpp. PR: https://github.com/llvm/llvm-project/pull/195269 --- .../Vectorize/LoopVectorizationPlanner.cpp | 90 +++++++++++++++++++ .../Transforms/Vectorize/LoopVectorize.cpp | 90 ------------------- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp index c6997a6c8e717..93d06172ae75f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp @@ -623,6 +623,96 @@ void VFSelectionContext::collectInLoopReductions() { } } +bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, + const VectorizationFactor &B, + const unsigned MaxTripCount, + bool HasTail, + bool IsEpilogue) const { + InstructionCost CostA = A.Cost; + InstructionCost CostB = B.Cost; + + // When there is a hint to always prefer scalable vectors, honour that hint. + if (Hints.isScalableVectorizationAlwaysPreferred()) + if (A.Width.isScalable() && CostA.isValid() && !B.Width.isScalable() && + !B.Width.isScalar()) + return true; + + // Improve estimate for the vector width if it is scalable. + unsigned EstimatedWidthA = A.Width.getKnownMinValue(); + unsigned EstimatedWidthB = B.Width.getKnownMinValue(); + if (std::optional VScale = Config.getVScaleForTuning()) { + if (A.Width.isScalable()) + EstimatedWidthA *= *VScale; + if (B.Width.isScalable()) + EstimatedWidthB *= *VScale; + } + + // When optimizing for size choose whichever is smallest, which will be the + // one with the smallest cost for the whole loop. On a tie pick the larger + // vector width, on the assumption that throughput will be greater. + if (Config.CostKind == TTI::TCK_CodeSize) + return CostA < CostB || + (CostA == CostB && EstimatedWidthA > EstimatedWidthB); + + // Assume vscale may be larger than 1 (or the value being tuned for), + // so that scalable vectorization is slightly favorable over fixed-width + // vectorization. + bool PreferScalable = !TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) && + A.Width.isScalable() && !B.Width.isScalable(); + + auto CmpFn = [PreferScalable](const InstructionCost &LHS, + const InstructionCost &RHS) { + return PreferScalable ? LHS <= RHS : LHS < RHS; + }; + + // To avoid the need for FP division: + // (CostA / EstimatedWidthA) < (CostB / EstimatedWidthB) + // <=> (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA) + bool LowerCostWithoutTC = + CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA); + if (!MaxTripCount) + return LowerCostWithoutTC; + + auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF, + InstructionCost VectorCost, + InstructionCost ScalarCost) { + // If the trip count is a known (possibly small) constant, the trip count + // will be rounded up to an integer number of iterations under + // FoldTailByMasking. The total cost in that case will be + // VecCost*ceil(TripCount/VF). When not folding the tail, the total + // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be + // some extra overheads, but for the purpose of comparing the costs of + // different VFs we can use this to compare the total loop-body cost + // expected after vectorization. + if (HasTail) + return VectorCost * (MaxTripCount / VF) + + ScalarCost * (MaxTripCount % VF); + return VectorCost * divideCeil(MaxTripCount, VF); + }; + + auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost); + auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost); + bool LowerCostWithTC = CmpFn(RTCostA, RTCostB); + LLVM_DEBUG(if (LowerCostWithTC != LowerCostWithoutTC) { + dbgs() << "LV: VF " << (LowerCostWithTC ? A.Width : B.Width) + << " has lower cost than VF " + << (LowerCostWithTC ? B.Width : A.Width) + << " when taking the cost of the remaining scalar loop iterations " + "into consideration for a maximum trip count of " + << MaxTripCount << ".\n"; + }); + return LowerCostWithTC; +} + +bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, + const VectorizationFactor &B, + bool HasTail, + bool IsEpilogue) const { + const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount(); + return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount, HasTail, + IsEpilogue); +} + // TODO: we could return a pair of values that specify the max VF and // min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of // `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c2d3f7755c9b0..4f9ab60f1c526 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3091,96 +3091,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return FixedScalableVFPair::getNone(); } -bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, - const VectorizationFactor &B, - const unsigned MaxTripCount, - bool HasTail, - bool IsEpilogue) const { - InstructionCost CostA = A.Cost; - InstructionCost CostB = B.Cost; - - // When there is a hint to always prefer scalable vectors, honour that hint. - if (Hints.isScalableVectorizationAlwaysPreferred()) - if (A.Width.isScalable() && CostA.isValid() && !B.Width.isScalable() && - !B.Width.isScalar()) - return true; - - // Improve estimate for the vector width if it is scalable. - unsigned EstimatedWidthA = A.Width.getKnownMinValue(); - unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (std::optional VScale = Config.getVScaleForTuning()) { - if (A.Width.isScalable()) - EstimatedWidthA *= *VScale; - if (B.Width.isScalable()) - EstimatedWidthB *= *VScale; - } - - // When optimizing for size choose whichever is smallest, which will be the - // one with the smallest cost for the whole loop. On a tie pick the larger - // vector width, on the assumption that throughput will be greater. - if (Config.CostKind == TTI::TCK_CodeSize) - return CostA < CostB || - (CostA == CostB && EstimatedWidthA > EstimatedWidthB); - - // Assume vscale may be larger than 1 (or the value being tuned for), - // so that scalable vectorization is slightly favorable over fixed-width - // vectorization. - bool PreferScalable = !TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) && - A.Width.isScalable() && !B.Width.isScalable(); - - auto CmpFn = [PreferScalable](const InstructionCost &LHS, - const InstructionCost &RHS) { - return PreferScalable ? LHS <= RHS : LHS < RHS; - }; - - // To avoid the need for FP division: - // (CostA / EstimatedWidthA) < (CostB / EstimatedWidthB) - // <=> (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA) - bool LowerCostWithoutTC = - CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA); - if (!MaxTripCount) - return LowerCostWithoutTC; - - auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF, - InstructionCost VectorCost, - InstructionCost ScalarCost) { - // If the trip count is a known (possibly small) constant, the trip count - // will be rounded up to an integer number of iterations under - // FoldTailByMasking. The total cost in that case will be - // VecCost*ceil(TripCount/VF). When not folding the tail, the total - // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be - // some extra overheads, but for the purpose of comparing the costs of - // different VFs we can use this to compare the total loop-body cost - // expected after vectorization. - if (HasTail) - return VectorCost * (MaxTripCount / VF) + - ScalarCost * (MaxTripCount % VF); - return VectorCost * divideCeil(MaxTripCount, VF); - }; - - auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost); - auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost); - bool LowerCostWithTC = CmpFn(RTCostA, RTCostB); - LLVM_DEBUG(if (LowerCostWithTC != LowerCostWithoutTC) { - dbgs() << "LV: VF " << (LowerCostWithTC ? A.Width : B.Width) - << " has lower cost than VF " - << (LowerCostWithTC ? B.Width : A.Width) - << " when taking the cost of the remaining scalar loop iterations " - "into consideration for a maximum trip count of " - << MaxTripCount << ".\n"; - }); - return LowerCostWithTC; -} - -bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, - const VectorizationFactor &B, - bool HasTail, - bool IsEpilogue) const { - const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount(); - return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount, HasTail, - IsEpilogue); -} - void LoopVectorizationPlanner::emitInvalidCostRemarks( OptimizationRemarkEmitter *ORE) { using RecipeVFPair = std::pair; From 3d3f4be0f13d69d46914c330895c9d5eff48a8d1 Mon Sep 17 00:00:00 2001 From: Andrei Sabalenka Date: Mon, 18 May 2026 01:19:05 +0300 Subject: [PATCH 16/17] [compiler-rt] Fix StackDepot benchmark thread barrier (#197633) Use Param.Threads (number of worker threads) as barrier threshold instead of Param.UniqueThreads (boolean that controls input generation). This also silences [-Wbool-integral-comparison](https://github.com/llvm/llvm-project/pull/194180) warning I'm working on. --- .../lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp index 02833888747ac..e6e400bb1fef7 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp @@ -174,11 +174,11 @@ class StackDepotBenchmark // '--gtest_filter=*Benchmark*' TEST_P(StackDepotBenchmark, DISABLED_Benchmark) { auto Param = GetParam(); - std::atomic here = {}; + std::atomic here = {}; auto thread = [&](int idx) { here++; - while (here < Param.UniqueThreads) std::this_thread::yield(); + while (here < Param.Threads) std::this_thread::yield(); std::vector frames(64); for (int r = 0; r < Param.RepeatPerThread; ++r) { From 8f740a3cb68c30eb3a276fbc3b9cf460a196c217 Mon Sep 17 00:00:00 2001 From: Zeyi Xu Date: Mon, 18 May 2026 07:15:55 +0800 Subject: [PATCH 17/17] [clang-tidy] Fix crash in misc-static-initialization-cycle (#198155) This commit fixes `misc-static-initialization-cycle` crashing on `catch (...)`. Catch-all handlers have no exception declaration, so traversal of `CXXCatchStmt` can call `TraverseDecl(nullptr)`. The check previously passed that null pointer to `DeclContext::containsDecl`. This commit fixes the problem by adding a null guard. Closes #198150 --- .../clang-tidy/misc/StaticInitializationCycleCheck.cpp | 2 +- .../checkers/misc/static-initialization-cycle.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp b/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp index eb230983c8a7a..3d1fc88e7233a 100644 --- a/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp @@ -193,7 +193,7 @@ class VarUseCollector : public DynamicRecursiveASTVisitor { } bool TraverseAttr(Attr *At) override { return true; } bool TraverseDecl(Decl *D) override { - if (DC && DC->containsDecl(D)) + if (D && DC && DC->containsDecl(D)) return DynamicRecursiveASTVisitor::TraverseDecl(D); return true; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp index 2e5af81b6af8c..c17a87758e243 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp @@ -120,6 +120,14 @@ int f1() { int S::A = f1(); } +namespace catch_all_handler { +void f() { + try { + } catch (...) { + } +} +} // catch_all_handler + namespace recursive_calls { int f2(); int f1() {