diff --git a/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp b/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp index eb230983c8a7a..3d1fc88e7233a 100644 --- a/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/StaticInitializationCycleCheck.cpp @@ -193,7 +193,7 @@ class VarUseCollector : public DynamicRecursiveASTVisitor { } bool TraverseAttr(Attr *At) override { return true; } bool TraverseDecl(Decl *D) override { - if (DC && DC->containsDecl(D)) + if (D && DC && DC->containsDecl(D)) return DynamicRecursiveASTVisitor::TraverseDecl(D); return true; } diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp index 2e5af81b6af8c..c17a87758e243 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/static-initialization-cycle.cpp @@ -120,6 +120,14 @@ int f1() { int S::A = f1(); } +namespace catch_all_handler { +void f() { + try { + } catch (...) { + } +} +} // catch_all_handler + namespace recursive_calls { int f2(); int f1() { diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5660f9670eae..0feecef6bbd4f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -506,6 +506,9 @@ Improvements to Clang's diagnostics - Added ``-Wattribute-alias`` to diagnose type mismatches between an alias and its aliased function. (#GH195550) +- Added warnings for floating-point exception function calls (fenv.h) without enabling + floating-point exception behavior via the appropriate flags or pragmas. (#GH128239) + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index b2fd522e6865c..d0a402e59ff60 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -498,6 +498,12 @@ class ASTContext : public RefCountedBase { /// The type for the C ucontext_t type. TypeDecl *ucontext_tDecl = nullptr; + /// The type for the C fexcept_t type. + TypeDecl *fexcept_tDecl = nullptr; + + /// The type for the C fenv_t type. + TypeDecl *fenv_tDecl = nullptr; + /// Type for the Block descriptor for Blocks CodeGen. /// /// Since this is only used for generation of debug info, it is not @@ -2350,6 +2356,30 @@ class ASTContext : public RefCountedBase { return QualType(); } + /// Set the type for the C fexcept_t type. + void setfexcept_tDecl(TypeDecl *fexcept_tDecl) { + this->fexcept_tDecl = fexcept_tDecl; + } + + /// Retrieve the C fexcept_t type. + QualType getfexcept_tType() const { + if (fexcept_tDecl) + return getTypeDeclType(ElaboratedTypeKeyword::None, + /*Qualifier=*/std::nullopt, fexcept_tDecl); + return QualType(); + } + + /// Set the type for the C fenv_t type. + void setfenv_tDecl(TypeDecl *fenv_tDecl) { this->fenv_tDecl = fenv_tDecl; } + + /// Retrieve the C fenv_t type. + QualType getfenv_tType() const { + if (fenv_tDecl) + return getTypeDeclType(ElaboratedTypeKeyword::None, + /*Qualifier=*/std::nullopt, fenv_tDecl); + return QualType(); + } + /// The result type of logical operations, '<', '>', '!=', etc. CanQualType getLogicalOperationType() const { return getLangOpts().CPlusPlus ? BoolTy : IntTy; @@ -2630,7 +2660,10 @@ class ASTContext : public RefCountedBase { GE_Missing_setjmp, /// Missing a type from - GE_Missing_ucontext + GE_Missing_ucontext, + + /// Missing a type from + GE_Missing_fenv }; QualType DecodeTypeStr(const char *&Str, const ASTContext &Context, diff --git a/clang/include/clang/Basic/BuiltinHeaders.def b/clang/include/clang/Basic/BuiltinHeaders.def index 23889a22769ed..f77665a2e8975 100644 --- a/clang/include/clang/Basic/BuiltinHeaders.def +++ b/clang/include/clang/Basic/BuiltinHeaders.def @@ -17,6 +17,7 @@ HEADER(BLOCKS_H, "Blocks.h") HEADER(COMPLEX_H, "complex.h") HEADER(CTYPE_H, "ctype.h") HEADER(EMMINTRIN_H, "emmintrin.h") +HEADER(FENV_H, "fenv.h") HEADER(FOUNDATION_NSOBJCRUNTIME_H, "Foundation/NSObjCRuntime.h") HEADER(IMMINTRIN_H, "immintrin.h") HEADER(INTRIN_H, "intrin.h") diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 40ec94ab75046..7b833487e23a2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4599,6 +4599,61 @@ def BlockObjectDispose : LibBuiltin<"blocks.h"> { } // FIXME: Also declare NSConcreteGlobalBlock and NSConcreteStackBlock. +def FeClearExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feclearexcept"]; + let Prototype = "int(int)"; +} + +def FeGetExceptFlag : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetexceptflag"]; + let Prototype = "int(fexcept_t*, int)"; +} + +def FeRaiseExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feraiseexcept"]; + let Prototype = "int(int)"; +} + +def FeSetExceptFlag : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetexceptflag"]; + let Prototype = "int(fexcept_t const*, int)"; +} + +def FeTestExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["fetestexcept"]; + let Prototype = "int(int)"; +} + +def FeGetRound : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetround"]; + let Prototype = "int()"; +} + +def FeSetRound : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetround"]; + let Prototype = "int(int)"; +} + +def FeGetEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["fegetenv"]; + let Prototype = "int(fenv_t*)"; +} + +def FeHoldExcept : LibBuiltin<"fenv.h"> { + let Spellings = ["feholdexcept"]; + let Prototype = "int(fenv_t*)"; +} + +def FeSetEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["fesetenv"]; + let Prototype = "int(fenv_t const*)"; +} + +def FeUpdateEnv : LibBuiltin<"fenv.h"> { + let Spellings = ["feupdateenv"]; + let Prototype = "int(fenv_t const*)"; +} + def __Addressof : LangBuiltin<"CXX_LANG"> { let Spellings = ["__addressof"]; let Attributes = [FunctionWithoutBuiltinPrefix, NoThrow, Const, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d7dd20d6a45e4..c73c116cdc451 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -1041,6 +1041,11 @@ def err_ptrauth_indirect_goto_addrlabel_arithmetic : Error< "%select{subtraction|addition}0 of address-of-label expressions is not " "supported with ptrauth indirect gotos">; +def warn_fe_access_without_fenv_access : Warning< + "'%0' used without enabling floating-point exception behavior; use 'pragma STDC " + "FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'">, + InGroup>; + // __ptrauth qualifier def err_ptrauth_qualifier_invalid : Error< "%select{return type|parameter type|property}1 may not be qualified with " diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index f07d8ebb75035..4bd21b4112580 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -877,6 +877,8 @@ NOTABLE_IDENTIFIER(FILE) NOTABLE_IDENTIFIER(jmp_buf) NOTABLE_IDENTIFIER(sigjmp_buf) NOTABLE_IDENTIFIER(ucontext_t) +NOTABLE_IDENTIFIER(fexcept_t) +NOTABLE_IDENTIFIER(fenv_t) NOTABLE_IDENTIFIER(float_t) NOTABLE_IDENTIFIER(double_t) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5202244cee2a7..1c8a47169bbd3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8264,6 +8264,10 @@ class Sema final : public SemaBase { return currentEvaluationContext().isUnevaluated(); } + bool isPotentiallyEvaluatedContext() const { + return currentEvaluationContext().isPotentiallyEvaluated(); + } + bool isImmediateFunctionContext() const { return currentEvaluationContext().isImmediateFunctionContext(); } diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 3c8f3ba59a07e..4bb287af687b5 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1225,7 +1225,13 @@ enum SpecialTypeIDs { SPECIAL_TYPE_OBJC_SEL_REDEFINITION = 6, /// C ucontext_t typedef type - SPECIAL_TYPE_UCONTEXT_T = 7 + SPECIAL_TYPE_UCONTEXT_T = 7, + + /// C fexcept_t typedef type + SPECIAL_TYPE_FEXCEPT_T = 8, + + /// C fenv_t typedef type + SPECIAL_TYPE_FENV_T = 9 }; /// The number of special type IDs. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index bc4771aec77d1..412ba583353d2 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12849,6 +12849,25 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, case 'm': Type = Context.MFloat8Ty; break; + case 'T': + switch (*Str++) { + case 'x': { + Type = Context.getfexcept_tType(); + break; + } + case 'e': { + Type = Context.getfenv_tType(); + break; + } + default: { + llvm_unreachable("Unexpected target builtin type"); + } + } + if (Type.isNull()) { + Error = ASTContext::GE_Missing_fenv; + return {}; + } + break; } // If there are modifiers and if we're allowed to parse them, go for it. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index cc834bbee23c4..03091f2ba0cfe 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3928,6 +3928,24 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (BuiltinCountedByRef(TheCall)) return ExprError(); break; + + case Builtin::BIfeclearexcept: + case Builtin::BIfegetexceptflag: + case Builtin::BIferaiseexcept: + case Builtin::BIfesetexceptflag: + case Builtin::BIfetestexcept: + case Builtin::BIfegetround: + case Builtin::BIfesetround: + case Builtin::BIfegetenv: + case Builtin::BIfeholdexcept: + case Builtin::BIfesetenv: + case Builtin::BIfeupdateenv: + if (TheCall->getFPFeaturesInEffect(getLangOpts()).getExceptionMode() == + LangOptions::FPE_Ignore && + isPotentiallyEvaluatedContext()) { + Diag(TheCall->getBeginLoc(), diag::warn_fe_access_without_fenv_access) + << FDecl->getName() << TheCall->getSourceRange(); + } } if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall)) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 4b9576479e29e..51ad5ac5f9e62 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2398,6 +2398,8 @@ static StringRef getHeaderName(Builtin::Context &BuiltinInfo, unsigned ID, return "setjmp.h"; case ASTContext::GE_Missing_ucontext: return "ucontext.h"; + case ASTContext::GE_Missing_fenv: + return "fenv.h"; } llvm_unreachable("unhandled error kind"); } @@ -7018,6 +7020,12 @@ Sema::ActOnTypedefNameDecl(Scope *S, DeclContext *DC, TypedefNameDecl *NewTD, case tok::NotableIdentifierKind::ucontext_t: Context.setucontext_tDecl(NewTD); break; + case tok::NotableIdentifierKind::fexcept_t: + Context.setfexcept_tDecl(NewTD); + break; + case tok::NotableIdentifierKind::fenv_t: + Context.setfenv_tDecl(NewTD); + break; case tok::NotableIdentifierKind::float_t: case tok::NotableIdentifierKind::double_t: NewTD->addAttr(AvailableOnlyInDefaultEvalMethodAttr::Create(Context)); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0f834859e982a..5425c1da7419b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5758,6 +5758,42 @@ void ASTReader::InitializeContext() { } } } + + if (TypeID Fexcept_t = SpecialTypes[SPECIAL_TYPE_FEXCEPT_T]) { + QualType Fexcept_tType = GetType(Fexcept_t); + if (Fexcept_tType.isNull()) { + Error("fexcept_t type is NULL"); + return; + } + + if (!Context.fexcept_tDecl) { + if (const TypedefType *Typedef = Fexcept_tType->getAs()) + Context.setfexcept_tDecl(Typedef->getDecl()); + else { + const TagType *Tag = Fexcept_tType->getAs(); + assert(Tag && "Invalid fexcept_t type in AST file"); + Context.setfexcept_tDecl(Tag->getDecl()); + } + } + } + + if (TypeID Fenv_t = SpecialTypes[SPECIAL_TYPE_FENV_T]) { + QualType Fenv_tType = GetType(Fenv_t); + if (Fenv_tType.isNull()) { + Error("fenv_t type is NULL"); + return; + } + + if (!Context.fenv_tDecl) { + if (const TypedefType *Typedef = Fenv_tType->getAs()) + Context.setfenv_tDecl(Typedef->getDecl()); + else { + const TagType *Tag = Fenv_tType->getAs(); + assert(Tag && "Invalid fenv_t type in AST file"); + Context.setfenv_tDecl(Tag->getDecl()); + } + } + } } ReadPragmaDiagnosticMappings(Context.getDiagnostics()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1970ed86589b5..c0c4aa107e200 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6158,6 +6158,8 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema *SemaPtr, StringRef isysroot, AddTypeRef(Context, Context.ObjCClassRedefinitionType, SpecialTypes); AddTypeRef(Context, Context.ObjCSelRedefinitionType, SpecialTypes); AddTypeRef(Context, Context.getucontext_tType(), SpecialTypes); + AddTypeRef(Context, Context.getfexcept_tType(), SpecialTypes); + AddTypeRef(Context, Context.getfenv_tType(), SpecialTypes); } if (SemaPtr) diff --git a/clang/test/PCH/builtins-fenv.c b/clang/test/PCH/builtins-fenv.c new file mode 100644 index 0000000000000..72bcf4a134730 --- /dev/null +++ b/clang/test/PCH/builtins-fenv.c @@ -0,0 +1,25 @@ +// Test this without pch. +// RUN: %clang_cc1 -include %S/builtins-fenv.h -fsyntax-only -verify %s + +// Test with pch. +// RUN: %clang_cc1 -emit-pch -o %t %S/builtins-fenv.h +// RUN: %clang_cc1 -include-pch %t -fsyntax-only -verify %s + +// expected-no-diagnostics +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +void f(void) { + #pragma STDC FENV_ACCESS ON + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +} diff --git a/clang/test/PCH/builtins-fenv.h b/clang/test/PCH/builtins-fenv.h new file mode 100644 index 0000000000000..8397c270df58e --- /dev/null +++ b/clang/test/PCH/builtins-fenv.h @@ -0,0 +1,18 @@ +// Header for PCH test builtins-fenv.c + +#define FE_INVALID 1 + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); diff --git a/clang/test/Sema/builtin-fenv.c b/clang/test/Sema/builtin-fenv.c new file mode 100644 index 0000000000000..db8a5334d1073 --- /dev/null +++ b/clang/test/Sema/builtin-fenv.c @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DWRONG_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DRIGHT_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DONLY_FEXCEPT_T %s -ast-dump | FileCheck %s --check-prefixes=CHECK1,CHECK2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -verify=c,expected -DNO_FEGETEXCEPTFLAG %s -ast-dump 2>&1 | FileCheck %s --check-prefixes=CHECK1 + +// tests inspired by clang/test/Sema/builtin-setjmp.c + +#ifdef __cplusplus +extern "C" { +#endif + +#if WRONG_FEXCEPT_T +typedef unsigned short int fexcept_t; +extern int fegetexceptflag(int, int); // c-warning {{incompatible redeclaration of library function 'fegetexceptflag'}} + // c-note@-1 {{'fegetexceptflag' is a builtin with type 'int (fexcept_t *, int)' (aka 'int (unsigned short *, int)')}} +#elif RIGHT_FEXCEPT_T +// c-no-diagnostics +typedef unsigned short int fexcept_t; +extern int fegetexceptflag(unsigned short int *, int); // OK, right type. +#elif ONLY_FEXCEPT_T +typedef long *fexcept_t; +#endif + +void use(void) { + #pragma STDC FENV_ACCESS ON + fegetexceptflag(0, 0); + #if NO_FEGETEXCEPTFLAG + // cxx-error@-2 {{undeclared identifier 'fegetexceptflag'}} + // c-error@-3 {{call to undeclared function 'fegetexceptflag'; ISO C99 and later do not support implicit function declarations}} + // c-warning@-4 {{declaration of built-in function 'fegetexceptflag' requires inclusion of the header }} + #elif ONLY_FEXCEPT_T + // cxx-error@-6 {{undeclared identifier 'fegetexceptflag'}} + // c-error@-7 {{call to undeclared library function 'fegetexceptflag' with type 'int (fexcept_t *, int)' (aka 'int (long **, int)'); ISO C99 and later do not support implicit function declarations}} + // c-note@-8 {{include the header or explicitly provide a declaration for 'fegetexceptflag'}} + #else + // cxx-no-diagnostics + #endif + + #ifdef NO_FEGETEXCEPTFLAG + // In this case, the regular AST dump doesn't dump the implicit declaration of 'fegetexceptflag'. + #pragma clang __debug dump fegetexceptflag + #endif +} + +// CHECK1: FunctionDecl {{.*}} used fegetexceptflag +// CHECK2: BuiltinAttr {{.*}} Implicit + + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/Sema/fenv-access-implicit.c b/clang/test/Sema/fenv-access-implicit.c new file mode 100644 index 0000000000000..0c4bd6b0eb855 --- /dev/null +++ b/clang/test/Sema/fenv-access-implicit.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +#define FE_INVALID 1 + +void test_fenv_access_undeclared(void) { + #pragma STDC FENV_ACCESS ON + feclearexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'feclearexcept'}} \ + expected-error {{call to undeclared library function 'feclearexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetexceptflag(flagp, FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fegetexceptflag'}} \ + expected-error {{call to undeclared library function 'fegetexceptflag' with type 'int (fexcept_t *, int)' (aka 'int (unsigned short *, int)'); ISO C99 and later do not support implicit function declarations}} + feraiseexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'feraiseexcept'}} \ + expected-error {{call to undeclared library function 'feraiseexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fesetexceptflag(flagp, FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fesetexceptflag'}} \ + expected-error {{call to undeclared library function 'fesetexceptflag' with type 'int (const fexcept_t *, int)' (aka 'int (const unsigned short *, int)'); ISO C99 and later do not support implicit function declarations}} + fetestexcept(FE_INVALID); // expected-note {{include the header or explicitly provide a declaration for 'fetestexcept'}} \ + expected-error {{call to undeclared library function 'fetestexcept' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetround(); // expected-note {{include the header or explicitly provide a declaration for 'fegetround'}} \ + expected-error {{call to undeclared library function 'fegetround' with type 'int (void)'; ISO C99 and later do not support implicit function declarations}} + fesetround(0); // expected-note {{include the header or explicitly provide a declaration for 'fesetround'}} \ + expected-error {{call to undeclared library function 'fesetround' with type 'int (int)'; ISO C99 and later do not support implicit function declarations}} + fegetenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'fegetenv'}} \ + expected-error {{call to undeclared library function 'fegetenv' with type 'int (fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + feholdexcept(envp); // expected-note {{include the header or explicitly provide a declaration for 'feholdexcept'}} \ + expected-error {{call to undeclared library function 'feholdexcept' with type 'int (fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + fesetenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'fesetenv'}} \ + expected-error {{call to undeclared library function 'fesetenv' with type 'int (const fenv_t *)'; ISO C99 and later do not support implicit function declarations}} + feupdateenv(envp); // expected-note {{include the header or explicitly provide a declaration for 'feupdateenv'}} \ + expected-error {{call to undeclared library function 'feupdateenv' with type 'int (const fenv_t *)'; ISO C99 and later do not support implicit function declarations}} +} diff --git a/clang/test/Sema/fenv-access-unevaluated.cpp b/clang/test/Sema/fenv-access-unevaluated.cpp new file mode 100644 index 0000000000000..14752ba3c377e --- /dev/null +++ b/clang/test/Sema/fenv-access-unevaluated.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); + +// expected-no-diagnostics +void test_fenv_access_unevaluated() { + decltype(::feclearexcept) a; + decltype(::fegetexceptflag) b; + decltype(::feraiseexcept) c; + decltype(::fesetexceptflag) d; + decltype(::fetestexcept) e; + decltype(::fegetround) f; + decltype(::fesetround) g; + decltype(::fegetenv) h; + decltype(::feholdexcept) i; + decltype(::fesetenv) j; + decltype(::feupdateenv) k; +} diff --git a/clang/test/Sema/fenv-access.c b/clang/test/Sema/fenv-access.c new file mode 100644 index 0000000000000..3a7b95af7ab4c --- /dev/null +++ b/clang/test/Sema/fenv-access.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -verify -Wfenv-access %s +// RUN: %clang_cc1 -verify -Wfenv-access -ffp-exception-behavior=maytrap -DNO_WARN %s +// RUN: %clang_cc1 -verify -Wfenv-access -ffp-exception-behavior=strict -DNO_WARN %s + +typedef struct {} fenv_t; +typedef unsigned short int fexcept_t; + +int feclearexcept(int excepts); +int fegetexceptflag(fexcept_t *flagp, int excepts); +int feraiseexcept(int excepts); +int fesetexceptflag(const fexcept_t *flagp, int excepts); +int fetestexcept(int excepts); +int fegetround(void); +int fesetround(int rounding_mode); +int fegetenv(fenv_t *envp); +int feholdexcept(fenv_t *envp); +int fesetenv(const fenv_t *envp); +int feupdateenv(const fenv_t *envp); + +#define FE_INVALID 1 + +fexcept_t *flagp = 0; +fenv_t *envp = 0; + +void test_fenv_access_off(void) { +#ifdef NO_WARN + // expected-no-diagnostics + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +#else + feclearexcept(FE_INVALID); // expected-warning {{'feclearexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetexceptflag(flagp, FE_INVALID); // expected-warning {{'fegetexceptflag' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feraiseexcept(FE_INVALID); // expected-warning {{'feraiseexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetexceptflag(flagp, FE_INVALID); // expected-warning {{'fesetexceptflag' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fetestexcept(FE_INVALID); // expected-warning {{'fetestexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetround(); // expected-warning {{'fegetround' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetround(0); // expected-warning {{'fesetround' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fegetenv(envp); // expected-warning {{'fegetenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feholdexcept(envp); // expected-warning {{'feholdexcept' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + fesetenv(envp); // expected-warning {{'fesetenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} + feupdateenv(envp); // expected-warning {{'feupdateenv' used without enabling floating-point exception behavior; use 'pragma STDC FENV_ACCESS ON' or compile with '-ffp-exception-behavior=maytrap'}} +#endif +} + +void test_fenv_access_on(void) { + #pragma STDC FENV_ACCESS ON + fesetround(0); + feclearexcept(FE_INVALID); + fegetexceptflag(flagp, FE_INVALID); + feraiseexcept(FE_INVALID); + fesetexceptflag(flagp, FE_INVALID); + fetestexcept(FE_INVALID); + fegetround(); + fesetround(0); + fegetenv(envp); + feholdexcept(envp); + fesetenv(envp); + feupdateenv(envp); +} diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp index c2e38c0d6aeb8..2394c3b299c81 100644 --- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp +++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp @@ -376,6 +376,8 @@ class PrototypeParser { .Case("uint64_t", "UWi") .Case("void", "v") .Case("wchar_t", "w") + .Case("fexcept_t", "Tx") + .Case("fenv_t", "Te") .Case("...", ".") .Default("error"); if (ReturnTypeVal == "error") diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp index 7ea6134b702bf..1cc64a38aaed8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp index 02833888747ac..e6e400bb1fef7 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp @@ -174,11 +174,11 @@ class StackDepotBenchmark // '--gtest_filter=*Benchmark*' TEST_P(StackDepotBenchmark, DISABLED_Benchmark) { auto Param = GetParam(); - std::atomic here = {}; + std::atomic here = {}; auto thread = [&](int idx) { here++; - while (here < Param.UniqueThreads) std::this_thread::yield(); + while (here < Param.Threads) std::this_thread::yield(); std::vector frames(64); for (int r = 0; r < Param.RepeatPerThread; ++r) { diff --git a/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp b/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp index feef8f81d70da..7c143701af6a3 100644 --- a/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/coverage-module-unloaded.cpp @@ -1,8 +1,8 @@ // Check that unloading a module doesn't break coverage dumping for remaining // modules. // RUN: mkdir -p %t.dir && cd %t.dir -// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib1 -fPIC -// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED %s -shared -o %dynamiclib2 -fPIC +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED_LIB %s -shared -o %dynamiclib1 -fPIC +// RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard -DSHARED_LIB %s -shared -o %dynamiclib2 -fPIC // RUN: %clangxx_asan -fsanitize-coverage=func,trace-pc-guard %s %libdl -o %t.dir/exe // RUN: mkdir -p %t.tmp/coverage-module-unloaded && cd %t.tmp/coverage-module-unloaded // RUN: %env_asan_opts=coverage=1:verbosity=1 %run %t.dir/exe %dynamiclib1 %dynamiclib2 2>&1 | FileCheck %s @@ -18,7 +18,7 @@ #include #include -#ifdef SHARED +#ifdef SHARED_LIB extern "C" { void bar() { printf("bar\n"); } } diff --git a/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp b/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp index 662625e16f3e1..bfabf40fdaa86 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/get_module_and_offset_for_pc.cpp @@ -1,5 +1,5 @@ // RUN: mkdir -p %t-dir -// RUN: %clangxx -DSHARED %s -shared -o %t-dir/get_module_and_offset_for_pc.so -fPIC +// RUN: %clangxx -DSHARED_LIB %s -shared -o %t-dir/get_module_and_offset_for_pc.so -fPIC // RUN: %clangxx -DSO_DIR=\"%t-dir\" -O0 %s -o %t // RUN: %run %t 2>&1 | FileCheck %s @@ -13,7 +13,7 @@ #include #include -#ifdef SHARED +#ifdef SHARED_LIB extern "C" { int foo() { return 1; } } diff --git a/llvm/docs/AliasAnalysis.rst b/llvm/docs/AliasAnalysis.rst index af6da8cf64ea3..9b05c9d7be138 100644 --- a/llvm/docs/AliasAnalysis.rst +++ b/llvm/docs/AliasAnalysis.rst @@ -638,16 +638,16 @@ implementations. You can use them with commands like: % opt -ds-aa -aa-eval foo.bc -disable-output -stats -The ``-print-alias-sets`` pass +The ``print`` pass ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``-print-alias-sets`` pass is exposed as part of the ``opt`` tool to print +The ``print`` pass is exposed as part of the ``opt`` tool to print out the Alias Sets formed by the `AliasSetTracker`_ class. This is useful if you're using the ``AliasSetTracker`` class. To use it, use something like: .. code-block:: bash - % opt -ds-aa -print-alias-sets -disable-output + % opt -passes='print' -disable-output The ``-aa-eval`` pass ^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst index 882ec2a71577e..66a682983d9d5 100644 --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -220,8 +220,8 @@ This pass decodes the debug info metadata in a module and prints it to standard This pass is a simple post-dominator construction algorithm for finding post-dominators. -``print-alias-sets``: Alias Set Printer ---------------------------------------- +``print``: Alias Set Printer +---------------------------------------- Yet to be written. @@ -237,8 +237,8 @@ in a human-readable form. This pass, only available in ``opt``, prints the SCCs of the call graph to standard error in a human-readable form. -``print-cfg-sccs``: Print SCCs of each function CFG ---------------------------------------------------- +``print``: Print SCCs of each function CFG +---------------------------------------------------- This pass, only available in ``opt``, prints the SCCs of each function CFG to standard error in a human-readable form. diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index fffd696e59baf..12b7ad458e93e 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -148,6 +148,11 @@ Makes programs 10x faster by doing Special New Thing. disabled by default to maintain compatibility with Binutils and LLVM older toolchains that do not define the `R_AARCH64_TLS_DTPREL64` static relocation type for TLS offsets. +* A bug was fixed that caused LLVM IR inline assembly clobbers of the x29 and + x30 registers to be ignored when they were written using their xN names + instead of the ABI names FP and LR. Note that LLVM IR produced by Clang + always uses the ABI names, but other frontends may not. + ([#167783](https://github.com/llvm/llvm-project/pull/167783)) ### Changes to the AMDGPU Backend diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index a01a67f136710..5182341fa7a89 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -155,7 +155,6 @@ struct ForcePassLinking { llvm::AliasAnalysis AA(TLI); llvm::BatchAAResults BAA(AA); llvm::AliasSetTracker X(BAA); - X.add(llvm::MemoryLocation()); // for -print-alias-sets (void)llvm::AreStatisticsEnabled(); (void)llvm::sys::RunningOnValgrind(); } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 3328bc0fe836f..0b5a6d8223102 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -507,12 +507,11 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("place-safepoints", PlaceSafepointsPass()) FUNCTION_PASS("print", PrintFunctionPass(errs())) -// TODO: rename to print after NPM switch -FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(errs())) -FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(errs())) -FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(errs())) -FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(errs())) -FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(errs())) +FUNCTION_PASS("print", AliasSetsPrinterPass(errs())) +FUNCTION_PASS("print", CFGSCCPrinterPass(errs())) +FUNCTION_PASS("print", MemDerefPrinterPass(errs())) +FUNCTION_PASS("print", MustExecutePrinterPass(errs())) +FUNCTION_PASS("print", PredicateInfoPrinterPass(errs())) FUNCTION_PASS("print", AssumptionPrinterPass(errs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(errs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(errs())) diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index a8fc28944af34..3ef6e693a4076 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -155,7 +155,7 @@ void initializeAArch64DAGToDAGISelLegacyPass(PassRegistry &); void initializeAArch64DeadRegisterDefinitionsLegacyPass(PassRegistry &); void initializeAArch64ExpandPseudoLegacyPass(PassRegistry &); void initializeAArch64LoadStoreOptLegacyPass(PassRegistry &); -void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); +void initializeAArch64LowerHomogeneousPrologEpilogLegacyPass(PassRegistry &); void initializeAArch64CodeLayoutOptPass(PassRegistry &); void initializeAArch64MIPeepholeOptLegacyPass(PassRegistry &); void initializeAArch64O0PreLegalizerCombinerLegacyPass(PassRegistry &); @@ -317,6 +317,12 @@ class AArch64ConditionalComparesPass MachineFunctionAnalysisManager &MFAM); }; +class AArch64LowerHomogeneousPrologEpilogPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0eac7bc11a777..3016e1b94c22d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/SmallVectorExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -13698,6 +13699,20 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(unsigned(AArch64::ZT0), &AArch64::ZTRRegClass); } + // Clang will correctly decode the usage of register name aliases into their + // official names. However, other frontends like `rustc` do not. The + // conversion below allows users of these frontends to use the ABI names for + // registers in LLVM-style register constraints. + // + // x31->sp is not included here because it's not a general register and + // needs different handling + unsigned XRegFromAlias = StringSwitch(Constraint.lower()) + .Cases({"{x29}", "{fp}"}, AArch64::FP) + .Cases({"{x30}", "{lr}"}, AArch64::LR) + .Default(AArch64::NoRegister); + if (XRegFromAlias != AArch64::NoRegister) + return std::make_pair(XRegFromAlias, &AArch64::GPR64RegClass); + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp index d69f12e7c0a7c..23fef847a75f1 100644 --- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp +++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp @@ -10,11 +10,13 @@ // //===----------------------------------------------------------------------===// +#include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64InstPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -23,6 +25,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include #include @@ -39,11 +42,11 @@ static cl::opt FrameHelperSizeThreshold( namespace { -class AArch64LowerHomogeneousPE { +class AArch64LowerHomogeneousPrologEpilogImpl { public: const AArch64InstrInfo *TII; - AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) + AArch64LowerHomogeneousPrologEpilogImpl(Module *M, MachineModuleInfo *MMI) : M(M), MMI(MMI) {} bool run(); @@ -69,11 +72,11 @@ class AArch64LowerHomogeneousPE { MachineBasicBlock::iterator &NextMBBI); }; -class AArch64LowerHomogeneousPrologEpilog : public ModulePass { +class AArch64LowerHomogeneousPrologEpilogLegacy : public ModulePass { public: static char ID; - AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {} + AArch64LowerHomogeneousPrologEpilogLegacy() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); @@ -89,22 +92,34 @@ class AArch64LowerHomogeneousPrologEpilog : public ModulePass { } // end anonymous namespace -char AArch64LowerHomogeneousPrologEpilog::ID = 0; +char AArch64LowerHomogeneousPrologEpilogLegacy::ID = 0; -INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, +INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilogLegacy, "aarch64-lower-homogeneous-prolog-epilog", AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) -bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { +bool AArch64LowerHomogeneousPrologEpilogLegacy::runOnModule(Module &M) { if (skipModule(M)) return false; MachineModuleInfo *MMI = &getAnalysis().getMMI(); - return AArch64LowerHomogeneousPE(&M, MMI).run(); + return AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run(); } -bool AArch64LowerHomogeneousPE::run() { +PreservedAnalyses +AArch64LowerHomogeneousPrologEpilogPass::run(Module &M, + ModuleAnalysisManager &MAM) { + MachineModuleInfo *MMI = &MAM.getResult(M).getMMI(); + bool Changed = AArch64LowerHomogeneousPrologEpilogImpl(&M, MMI).run(); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +bool AArch64LowerHomogeneousPrologEpilogImpl::run() { bool Changed = false; for (auto &F : *M) { if (F.empty()) @@ -455,7 +470,7 @@ static bool shouldUseFrameHelper(MachineBasicBlock &MBB, /// ldp x29, x30, [sp, #32] /// ldp x20, x19, [sp, #16] /// ldp x22, x21, [sp], #48 -bool AArch64LowerHomogeneousPE::lowerEpilog( +bool AArch64LowerHomogeneousPrologEpilogImpl::lowerEpilog( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { auto &MF = *MBB.getParent(); @@ -545,7 +560,7 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( /// stp x22, x21, [sp, #-48]! /// stp x20, x19, [sp, #16] /// stp x29, x30, [sp, #32] -bool AArch64LowerHomogeneousPE::lowerProlog( +bool AArch64LowerHomogeneousPrologEpilogImpl::lowerProlog( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { auto &MF = *MBB.getParent(); @@ -626,9 +641,9 @@ bool AArch64LowerHomogeneousPE::lowerProlog( /// @param MBBI current instruction iterator /// @param NextMBBI next instruction iterator which can be updated /// @return True when IR is changed. -bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMI( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); switch (Opcode) { @@ -642,7 +657,7 @@ bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, return false; } -bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMBB(MachineBasicBlock &MBB) { bool Modified = false; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -655,7 +670,8 @@ bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { return Modified; } -bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { +bool AArch64LowerHomogeneousPrologEpilogImpl::runOnMachineFunction( + MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); bool Modified = false; @@ -665,5 +681,5 @@ bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { } ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { - return new AArch64LowerHomogeneousPrologEpilog(); + return new AArch64LowerHomogeneousPrologEpilogLegacy(); } diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def index de1b5834112e7..e3e47b12a9ec3 100644 --- a/llvm/lib/Target/AArch64/AArch64PassRegistry.def +++ b/llvm/lib/Target/AArch64/AArch64PassRegistry.def @@ -16,6 +16,7 @@ #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("aarch64-lower-homogeneous-prolog-epilog", AArch64LowerHomogeneousPrologEpilogPass()) #undef MODULE_PASS #ifndef FUNCTION_PASS diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 57929316954d6..b8cc69f7569d6 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -277,7 +277,7 @@ LLVMInitializeAArch64Target() { initializeAArch64SLSHardeningLegacyPass(PR); initializeAArch64StackTaggingPass(PR); initializeAArch64StackTaggingPreRALegacyPass(PR); - initializeAArch64LowerHomogeneousPrologEpilogPass(PR); + initializeAArch64LowerHomogeneousPrologEpilogLegacyPass(PR); initializeAArch64DAGToDAGISelLegacyPass(PR); initializeAArch64CondBrTuningPass(PR); initializeAArch64Arm64ECCallLoweringPass(PR); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3dc9f503088e6..c91143fefb6af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20961,7 +20961,7 @@ std::pair X86TargetLowering::BuildFILD( /// Horizontal vector math instructions may be slower than normal math with /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch /// implementation, and likely shuffle complexity of the alternate sequence. -static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, +static bool shouldUseHorizontalOp(bool IsSingleSource, const SelectionDAG &DAG, const X86Subtarget &Subtarget) { bool IsOptimizingSize = DAG.shouldOptForSize(); bool HasFastHOps = Subtarget.hasFastHorizontalOps(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a064aa1016399..15e4668671807 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3653,7 +3653,7 @@ static Instruction *foldNestedSelects(SelectInst &OuterSelVal, /// already poison. For example, if ValAssumedPoison is `icmp samesign X, 10` /// and V is `icmp ne X, 5`, impliesPoisonOrCond returns true. static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V, - bool Expected) { + bool Expected, const SimplifyQuery &SQ) { if (impliesPoison(ValAssumedPoison, V)) return true; @@ -3678,6 +3678,14 @@ static bool impliesPoisonOrCond(const Value *ValAssumedPoison, const Value *V, *RHSC2); } } + Value *A; + if (match(ValAssumedPoison, m_NUWTrunc(m_Value(A))) && + isGuaranteedNotToBePoison(A)) { + assert(ValAssumedPoison->getType()->isIntOrIntVectorTy(1)); + return computeKnownBits( + A, SQ.getWithInstruction(cast(ValAssumedPoison))) + .getMaxValue() == 1; + } return false; } @@ -3703,13 +3711,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { // checks whether folding it does not convert a well-defined value into // poison. if (match(TrueVal, m_One())) { - if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false)) { + if (impliesPoisonOrCond(FalseVal, CondVal, /*Expected=*/false, SQ)) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) && - impliesPoisonOrCond(FalseVal, B, /*Expected=*/false)) { + impliesPoisonOrCond(FalseVal, B, /*Expected=*/false, SQ)) { // (A || B) || C --> A || (B | C) Value *LOr = Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal)); if (auto *I = dyn_cast(LOr)) { @@ -3749,13 +3757,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { } if (match(FalseVal, m_Zero())) { - if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true)) { + if (impliesPoisonOrCond(TrueVal, CondVal, /*Expected=*/true, SQ)) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) && - impliesPoisonOrCond(TrueVal, B, /*Expected=*/true)) { + impliesPoisonOrCond(TrueVal, B, /*Expected=*/true, SQ)) { // (A && B) && C --> A && (B & C) Value *LAnd = Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal)); if (auto *I = dyn_cast(LAnd)) { diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index f3cdb3518ddcb..9fe751a56df64 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -641,9 +641,9 @@ Instruction *IndirectCallPromoter::computeVTableInfos( continue; auto &Candidate = Candidates[CalleeIndexIter->second]; - // There shouldn't be duplicate GUIDs in one !prof metadata (except - // duplicated zeros), so assign counters directly won't cause overwrite or - // counter loss. + // There should never be duplicate GUIDs in one !prof metdata, as this is + // an IR invariant enforced by the verifier. Assigning counters directly + // won't cause overwrite or counter loss. Candidate.VTableGUIDAndCounts[VTableVal] = V.Count; Candidate.AddressPoints.push_back( getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index da581e41dc9a8..d5696bc9ea956 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -158,10 +158,9 @@ expandToSwitch(CallBase *CB, const JumpTableTy &JT, DomTreeUpdater &DTU, for (const auto &[G, C] : Targets) { [[maybe_unused]] auto It = GuidToCounter.insert({G, C}); - // TODO(boomanaiden154): Currently we do not assert on inserting - // duplicate GUIDs because we might have multiple zeros when the profile - // loader fails to map addresses to functions. Readd the assertion that - // we did insert once this has been fixed. + // We should always be inserting as it is verifier-enforced IR invariant + // that VP metadata does not have duplicate values. + assert(It.second); } } for (auto [Index, Func] : llvm::enumerate(JT.Funcs)) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp index c6997a6c8e717..93d06172ae75f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp @@ -623,6 +623,96 @@ void VFSelectionContext::collectInLoopReductions() { } } +bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, + const VectorizationFactor &B, + const unsigned MaxTripCount, + bool HasTail, + bool IsEpilogue) const { + InstructionCost CostA = A.Cost; + InstructionCost CostB = B.Cost; + + // When there is a hint to always prefer scalable vectors, honour that hint. + if (Hints.isScalableVectorizationAlwaysPreferred()) + if (A.Width.isScalable() && CostA.isValid() && !B.Width.isScalable() && + !B.Width.isScalar()) + return true; + + // Improve estimate for the vector width if it is scalable. + unsigned EstimatedWidthA = A.Width.getKnownMinValue(); + unsigned EstimatedWidthB = B.Width.getKnownMinValue(); + if (std::optional VScale = Config.getVScaleForTuning()) { + if (A.Width.isScalable()) + EstimatedWidthA *= *VScale; + if (B.Width.isScalable()) + EstimatedWidthB *= *VScale; + } + + // When optimizing for size choose whichever is smallest, which will be the + // one with the smallest cost for the whole loop. On a tie pick the larger + // vector width, on the assumption that throughput will be greater. + if (Config.CostKind == TTI::TCK_CodeSize) + return CostA < CostB || + (CostA == CostB && EstimatedWidthA > EstimatedWidthB); + + // Assume vscale may be larger than 1 (or the value being tuned for), + // so that scalable vectorization is slightly favorable over fixed-width + // vectorization. + bool PreferScalable = !TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) && + A.Width.isScalable() && !B.Width.isScalable(); + + auto CmpFn = [PreferScalable](const InstructionCost &LHS, + const InstructionCost &RHS) { + return PreferScalable ? LHS <= RHS : LHS < RHS; + }; + + // To avoid the need for FP division: + // (CostA / EstimatedWidthA) < (CostB / EstimatedWidthB) + // <=> (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA) + bool LowerCostWithoutTC = + CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA); + if (!MaxTripCount) + return LowerCostWithoutTC; + + auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF, + InstructionCost VectorCost, + InstructionCost ScalarCost) { + // If the trip count is a known (possibly small) constant, the trip count + // will be rounded up to an integer number of iterations under + // FoldTailByMasking. The total cost in that case will be + // VecCost*ceil(TripCount/VF). When not folding the tail, the total + // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be + // some extra overheads, but for the purpose of comparing the costs of + // different VFs we can use this to compare the total loop-body cost + // expected after vectorization. + if (HasTail) + return VectorCost * (MaxTripCount / VF) + + ScalarCost * (MaxTripCount % VF); + return VectorCost * divideCeil(MaxTripCount, VF); + }; + + auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost); + auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost); + bool LowerCostWithTC = CmpFn(RTCostA, RTCostB); + LLVM_DEBUG(if (LowerCostWithTC != LowerCostWithoutTC) { + dbgs() << "LV: VF " << (LowerCostWithTC ? A.Width : B.Width) + << " has lower cost than VF " + << (LowerCostWithTC ? B.Width : A.Width) + << " when taking the cost of the remaining scalar loop iterations " + "into consideration for a maximum trip count of " + << MaxTripCount << ".\n"; + }); + return LowerCostWithTC; +} + +bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, + const VectorizationFactor &B, + bool HasTail, + bool IsEpilogue) const { + const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount(); + return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount, HasTail, + IsEpilogue); +} + // TODO: we could return a pair of values that specify the max VF and // min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of // `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1de70dfd09a83..4f9ab60f1c526 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3091,96 +3091,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return FixedScalableVFPair::getNone(); } -bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, - const VectorizationFactor &B, - const unsigned MaxTripCount, - bool HasTail, - bool IsEpilogue) const { - InstructionCost CostA = A.Cost; - InstructionCost CostB = B.Cost; - - // When there is a hint to always prefer scalable vectors, honour that hint. - if (Hints.isScalableVectorizationAlwaysPreferred()) - if (A.Width.isScalable() && CostA.isValid() && !B.Width.isScalable() && - !B.Width.isScalar()) - return true; - - // Improve estimate for the vector width if it is scalable. - unsigned EstimatedWidthA = A.Width.getKnownMinValue(); - unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (std::optional VScale = Config.getVScaleForTuning()) { - if (A.Width.isScalable()) - EstimatedWidthA *= *VScale; - if (B.Width.isScalable()) - EstimatedWidthB *= *VScale; - } - - // When optimizing for size choose whichever is smallest, which will be the - // one with the smallest cost for the whole loop. On a tie pick the larger - // vector width, on the assumption that throughput will be greater. - if (Config.CostKind == TTI::TCK_CodeSize) - return CostA < CostB || - (CostA == CostB && EstimatedWidthA > EstimatedWidthB); - - // Assume vscale may be larger than 1 (or the value being tuned for), - // so that scalable vectorization is slightly favorable over fixed-width - // vectorization. - bool PreferScalable = !TTI.preferFixedOverScalableIfEqualCost(IsEpilogue) && - A.Width.isScalable() && !B.Width.isScalable(); - - auto CmpFn = [PreferScalable](const InstructionCost &LHS, - const InstructionCost &RHS) { - return PreferScalable ? LHS <= RHS : LHS < RHS; - }; - - // To avoid the need for FP division: - // (CostA / EstimatedWidthA) < (CostB / EstimatedWidthB) - // <=> (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA) - bool LowerCostWithoutTC = - CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA); - if (!MaxTripCount) - return LowerCostWithoutTC; - - auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF, - InstructionCost VectorCost, - InstructionCost ScalarCost) { - // If the trip count is a known (possibly small) constant, the trip count - // will be rounded up to an integer number of iterations under - // FoldTailByMasking. The total cost in that case will be - // VecCost*ceil(TripCount/VF). When not folding the tail, the total - // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be - // some extra overheads, but for the purpose of comparing the costs of - // different VFs we can use this to compare the total loop-body cost - // expected after vectorization. - if (HasTail) - return VectorCost * (MaxTripCount / VF) + - ScalarCost * (MaxTripCount % VF); - return VectorCost * divideCeil(MaxTripCount, VF); - }; - - auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost); - auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost); - bool LowerCostWithTC = CmpFn(RTCostA, RTCostB); - LLVM_DEBUG(if (LowerCostWithTC != LowerCostWithoutTC) { - dbgs() << "LV: VF " << (LowerCostWithTC ? A.Width : B.Width) - << " has lower cost than VF " - << (LowerCostWithTC ? B.Width : A.Width) - << " when taking the cost of the remaining scalar loop iterations " - "into consideration for a maximum trip count of " - << MaxTripCount << ".\n"; - }); - return LowerCostWithTC; -} - -bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A, - const VectorizationFactor &B, - bool HasTail, - bool IsEpilogue) const { - const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount(); - return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount, HasTail, - IsEpilogue); -} - void LoopVectorizationPlanner::emitInvalidCostRemarks( OptimizationRemarkEmitter *ORE) { using RecipeVFPair = std::pair; @@ -7604,7 +7514,7 @@ preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { /// preheader of the vector epilogue loop, after created by the execution of \p /// Plan. static SmallVector preparePlanForEpilogueVectorLoop( - VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, + VPlan &MainPlan, VPlan &Plan, Loop *L, const SCEV2ValueTy &ExpandedSCEVs, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel &CM, VFSelectionContext &Config, ScalarEvolution &SE) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); @@ -7614,28 +7524,30 @@ static SmallVector preparePlanForEpilogueVectorLoop( VPValue *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to start // at the resume value from the main vector loop. Find the resume value - // created during execution of the main VPlan. It must be the first phi in the - // loop preheader. Add this resume value as an offset to the canonical IV of - // the epilogue loop. + // created during execution of the main VPlan. Add this resume value as an + // offset to the canonical IV of the epilogue loop. using namespace llvm::PatternMatch; - PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); - for (Value *Inc : EPResumeVal->incoming_values()) { - if (match(Inc, m_SpecificInt(0))) - continue; - assert(!EPI.VectorTripCount && - "Must only have a single non-zero incoming value"); - EPI.VectorTripCount = Inc; - } - // If we didn't find a non-zero vector trip count, all incoming values - // must be zero, which also means the vector trip count is zero. Pick the - // first zero as vector trip count. - // TODO: We should not choose VF * UF so the main vector loop is known to - // be dead. - if (!EPI.VectorTripCount) { - assert(EPResumeVal->getNumIncomingValues() > 0 && - all_of(EPResumeVal->incoming_values(), match_fn(m_SpecificInt(0))) && - "all incoming values must be 0"); - EPI.VectorTripCount = EPResumeVal->getOperand(0); + VPInstruction *ResumeForEpilogue = + cast(&*MainPlan.getScalarPreheader()->getFirstNonPhi()); + Value *EPResumeVal = ResumeForEpilogue->getUnderlyingValue(); + if (auto *ResumePhi = dyn_cast(EPResumeVal)) { + for (Value *Inc : ResumePhi->incoming_values()) { + if (match(Inc, m_SpecificInt(0))) + continue; + assert(!EPI.VectorTripCount && + "Must only have a single non-zero incoming value"); + EPI.VectorTripCount = Inc; + } + // If we didn't find a non-zero vector trip count, all incoming values + // must be zero, which also means the vector trip count is zero. + if (!EPI.VectorTripCount) { + assert(ResumePhi->getNumIncomingValues() > 0 && + all_of(ResumePhi->incoming_values(), match_fn(m_SpecificInt(0))) && + "all incoming values must be 0"); + EPI.VectorTripCount = ResumePhi->getIncomingValue(0); + } + } else { + EPI.VectorTripCount = EPResumeVal; } VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); assert(all_of(IV->users(), @@ -7869,40 +7781,30 @@ static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L, assert(EPI.MainLoopIterationCountCheck && EPI.EpilogueIterationCountCheck && "expected this to be saved from the previous pass."); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, VecEpiloguePreHeader); - DTU.applyUpdates({{DominatorTree::Delete, EPI.MainLoopIterationCountCheck, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, EPI.MainLoopIterationCountCheck, - VecEpiloguePreHeader}}); + // Helper to redirect an edge from \p BB to \p VecEpilogueIterationCountCheck + // to \p NewSucc instead, updating the DomTree. + auto RedirectEdge = [&](BasicBlock *BB, BasicBlock *NewSucc) { + BB->getTerminator()->replaceUsesOfWith(VecEpilogueIterationCountCheck, + NewSucc); + DTU.applyUpdates( + {{DominatorTree::Delete, BB, VecEpilogueIterationCountCheck}, + {DominatorTree::Insert, BB, NewSucc}}); + }; + + RedirectEdge(EPI.MainLoopIterationCountCheck, VecEpiloguePreHeader); BasicBlock *ScalarPH = cast(EpiPlan.getScalarPreheader())->getIRBasicBlock(); - EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates( - {{DominatorTree::Delete, EPI.EpilogueIterationCountCheck, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, EPI.EpilogueIterationCountCheck, ScalarPH}}); + RedirectEdge(EPI.EpilogueIterationCountCheck, ScalarPH); // Adjust the terminators of runtime check blocks and phis using them. BasicBlock *SCEVCheckBlock = Checks.getSCEVChecks().second; BasicBlock *MemCheckBlock = Checks.getMemRuntimeChecks().second; - if (SCEVCheckBlock) { - SCEVCheckBlock->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates({{DominatorTree::Delete, SCEVCheckBlock, - VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, SCEVCheckBlock, ScalarPH}}); - } - if (MemCheckBlock) { - MemCheckBlock->getTerminator()->replaceUsesOfWith( - VecEpilogueIterationCountCheck, ScalarPH); - DTU.applyUpdates( - {{DominatorTree::Delete, MemCheckBlock, VecEpilogueIterationCountCheck}, - {DominatorTree::Insert, MemCheckBlock, ScalarPH}}); - } + if (SCEVCheckBlock) + RedirectEdge(SCEVCheckBlock, ScalarPH); + if (MemCheckBlock) + RedirectEdge(MemCheckBlock, ScalarPH); // The vec.epilog.iter.check block may contain Phi nodes from inductions // or reductions which merge control-flow from the latch block and the @@ -7925,11 +7827,11 @@ static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L, return EPI.EpilogueIterationCountCheck == IncB; })) continue; - Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck); - if (SCEVCheckBlock) - Phi->removeIncomingValue(SCEVCheckBlock); - if (MemCheckBlock) - Phi->removeIncomingValue(MemCheckBlock); + for (BasicBlock *BB : + {EPI.EpilogueIterationCountCheck, SCEVCheckBlock, MemCheckBlock}) { + if (BB) + Phi->removeIncomingValue(BB); + } } auto IP = VecEpiloguePreHeader->getFirstNonPHIIt(); @@ -8381,7 +8283,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, Checks, BestEpiPlan); SmallVector InstsToMove = preparePlanForEpilogueVectorLoop( - BestEpiPlan, L, ExpandedSCEVs, EPI, CM, Config, *PSE.getSE()); + BestMainPlan, BestEpiPlan, L, ExpandedSCEVs, EPI, CM, Config, + *PSE.getSE()); LVP.attachRuntimeChecks(BestEpiPlan, Checks, HasBranchWeights); LVP.executePlan( EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, diff --git a/llvm/test/Analysis/AliasSet/argmemonly.ll b/llvm/test/Analysis/AliasSet/argmemonly.ll index 995fb26ca436a..6912447491149 100644 --- a/llvm/test/Analysis/AliasSet/argmemonly.ll +++ b/llvm/test/Analysis/AliasSet/argmemonly.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/guards.ll b/llvm/test/Analysis/AliasSet/guards.ll index 9ca70f6244800..32f74a41773d6 100644 --- a/llvm/test/Analysis/AliasSet/guards.ll +++ b/llvm/test/Analysis/AliasSet/guards.ll @@ -1,4 +1,4 @@ -; RUN: opt -aa-pipeline=basic-aa -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='print' -S -o - < %s 2>&1 | FileCheck %s declare void @llvm.experimental.guard(i1, ...) ; CHECK: Alias sets for function 'test0': diff --git a/llvm/test/Analysis/AliasSet/intrinsics.ll b/llvm/test/Analysis/AliasSet/intrinsics.ll index 0dc802ca7e0aa..a0a319ae3e683 100644 --- a/llvm/test/Analysis/AliasSet/intrinsics.ll +++ b/llvm/test/Analysis/AliasSet/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions" +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions" ; CHECK: Alias sets for function 'test1': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. diff --git a/llvm/test/Analysis/AliasSet/memloc-vscale.ll b/llvm/test/Analysis/AliasSet/memloc-vscale.ll index 6b41604637405..acd183818903b 100644 --- a/llvm/test/Analysis/AliasSet/memloc-vscale.ll +++ b/llvm/test/Analysis/AliasSet/memloc-vscale.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes=print-alias-sets 2>&1 | FileCheck %s +; RUN: opt -S < %s -passes='print' 2>&1 | FileCheck %s ; CHECK-LABEL: Alias sets for function 'sn' ; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Memory locations: (ptr %p, LocationSize::precise(vscale x 16)), (ptr %p, LocationSize::precise(8)) diff --git a/llvm/test/Analysis/AliasSet/memset.ll b/llvm/test/Analysis/AliasSet/memset.ll index 17f1e53f77e32..01242fa0171b3 100644 --- a/llvm/test/Analysis/AliasSet/memset.ll +++ b/llvm/test/Analysis/AliasSet/memset.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/memtransfer.ll b/llvm/test/Analysis/AliasSet/memtransfer.ll index 93290c39620eb..b91a14099f68c 100644 --- a/llvm/test/Analysis/AliasSet/memtransfer.ll +++ b/llvm/test/Analysis/AliasSet/memtransfer.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -S -o - < %s 2>&1 | FileCheck %s @s = global i8 1, align 1 @d = global i8 2, align 1 diff --git a/llvm/test/Analysis/AliasSet/saturation.ll b/llvm/test/Analysis/AliasSet/saturation.ll index 27f5bbee2f55c..1d547dcd8f26a 100644 --- a/llvm/test/Analysis/AliasSet/saturation.ll +++ b/llvm/test/Analysis/AliasSet/saturation.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=print-alias-sets -alias-set-saturation-threshold=4 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOSAT -; RUN: opt -passes=print-alias-sets -alias-set-saturation-threshold=3 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT +; RUN: opt -passes='print' -alias-set-saturation-threshold=4 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOSAT +; RUN: opt -passes='print' -alias-set-saturation-threshold=3 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT ; CHECK-LABEL: 'nomerge' ; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(4)) diff --git a/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll b/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll index 37d8e55cb2ff3..ad213c79697ad 100644 --- a/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll +++ b/llvm/test/Analysis/BasicAA/separate_storage-alias-sets.ll @@ -2,7 +2,7 @@ ; separate storage hints. This lets alias analysis users (such as the alias set ; tracker) who can't be context-sensitive still get the benefits of hints. -; RUN: opt < %s -basic-aa-separate-storage -S -passes=print-alias-sets 2>&1 | FileCheck %s +; RUN: opt < %s -basic-aa-separate-storage -S -passes='print' 2>&1 | FileCheck %s declare void @llvm.assume(i1) diff --git a/llvm/test/Analysis/MustExecute/const-cond.ll b/llvm/test/Analysis/MustExecute/const-cond.ll index 97358f670b88c..01553b2ca857d 100644 --- a/llvm/test/Analysis/MustExecute/const-cond.ll +++ b/llvm/test/Analysis/MustExecute/const-cond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; In general the CFG below is easily simplified but this is useful for ; pass ordering issue elimination. diff --git a/llvm/test/Analysis/MustExecute/infinite_loops.ll b/llvm/test/Analysis/MustExecute/infinite_loops.ll index 3fac1293df5d4..844dd944b5d38 100644 --- a/llvm/test/Analysis/MustExecute/infinite_loops.ll +++ b/llvm/test/Analysis/MustExecute/infinite_loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; Infinite loop. ; Make sure that the backedge is mustexec. diff --git a/llvm/test/Analysis/MustExecute/irreducible-cfg.ll b/llvm/test/Analysis/MustExecute/irreducible-cfg.ll index a452761ab3356..5db862995cecd 100644 --- a/llvm/test/Analysis/MustExecute/irreducible-cfg.ll +++ b/llvm/test/Analysis/MustExecute/irreducible-cfg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s ; The loop body has two predecessors, %header and %side-entry. This leads to irreducible-cfg define i64 @baz() { diff --git a/llvm/test/Analysis/MustExecute/loop-header.ll b/llvm/test/Analysis/MustExecute/loop-header.ll index 50efd74b61b11..d632323f94978 100644 --- a/llvm/test/Analysis/MustExecute/loop-header.ll +++ b/llvm/test/Analysis/MustExecute/loop-header.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -disable-output -passes=print-mustexecute %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' %s 2>&1 | FileCheck %s define i1 @header_with_icf(ptr noalias %p, i32 %high) { ; CHECK-LABEL: @header_with_icf( diff --git a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll index f3360f7cd0753..d57a54a84eb6e 100644 --- a/llvm/test/Analysis/MustExecute/must_be_executed_context.ll +++ b/llvm/test/Analysis/MustExecute/must_be_executed_context.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=print-mustexecute -disable-output 2>&1 | FileCheck %s --check-prefix=ME +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s --check-prefix=ME ; RUN: opt < %s -passes=print-must-be-executed-contexts -disable-output 2>&1 | FileCheck %s --check-prefix=MBEC ; ; void simple_conditional(int c) { diff --git a/llvm/test/Analysis/MustExecute/pr57780.ll b/llvm/test/Analysis/MustExecute/pr57780.ll index 4b26cadbb42c5..2f6a7a2831f8e 100644 --- a/llvm/test/Analysis/MustExecute/pr57780.ll +++ b/llvm/test/Analysis/MustExecute/pr57780.ll @@ -1,4 +1,4 @@ -; RUN: opt -disable-output -passes=print-mustexecute < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' < %s 2>&1 | FileCheck %s @c = global i16 0, align 2 diff --git a/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll b/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll index 4b63c036f5491..47b8cc95388b4 100644 --- a/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll +++ b/llvm/test/Analysis/ValueTracking/deref-abstract-gc.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK target datalayout = "e-i32:32:64" diff --git a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll index 8c5216e0c45d9..fc40c68624f2d 100644 --- a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll +++ b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics=false 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL -; RUN: opt -passes=print-memderefs -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK,POINT +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics=false 2>&1 | FileCheck %s --check-prefixes=CHECK,GLOBAL +; RUN: opt -passes='print' -S < %s -disable-output -use-dereferenceable-at-point-semantics 2>&1 | FileCheck %s --check-prefixes=CHECK,POINT ; Uses the print-deref (+ analyze to print) pass to run diff --git a/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll b/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll new file mode 100644 index 0000000000000..cde55522fb19b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/inline-asm-clobber-x29-x30.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck %s + +; Test that both numeric register names (x29, x30) and their architectural +; aliases (fp, lr) work correctly as clobbers in inline assembly. + +define void @clobber_x29() nounwind { +; CHECK-LABEL: clobber_x29: +; CHECK: str x29, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x29, [sp + tail call void asm sideeffect "", "~{x29}"() + ret void +} + +define void @clobber_fp() nounwind { +; CHECK-LABEL: clobber_fp: +; CHECK: str x29, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x29, [sp + tail call void asm sideeffect "", "~{fp}"() + ret void +} + +define void @clobber_x30() nounwind { +; CHECK-LABEL: clobber_x30: +; CHECK: str x30, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x30, [sp + tail call void asm sideeffect "", "~{x30}"() + ret void +} + +define void @clobber_lr() nounwind { +; CHECK-LABEL: clobber_lr: +; CHECK: str x30, [sp +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x30, [sp + tail call void asm sideeffect "", "~{lr}"() + ret void +} diff --git a/llvm/test/Other/debugcounter-predicateinfo.ll b/llvm/test/Other/debugcounter-predicateinfo.ll index d91b0f8904cb0..57929220c6a28 100644 --- a/llvm/test/Other/debugcounter-predicateinfo.ll +++ b/llvm/test/Other/debugcounter-predicateinfo.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -debug-counter=predicateinfo-rename=1 -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -debug-counter=predicateinfo-rename=1 -passes='print' < %s 2>&1 | FileCheck %s ;; Test that, with debug counters on, we don't rename the first info, only the second define fastcc void @barney() { ; CHECK-LABEL: @barney( diff --git a/llvm/test/Other/print-cfg-scc.ll b/llvm/test/Other/print-cfg-scc.ll index 19ac00e3d7641..31ccd614a1918 100644 --- a/llvm/test/Other/print-cfg-scc.ll +++ b/llvm/test/Other/print-cfg-scc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=print-cfg-sccs -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s ; CHECK: SCC #1: %UnifiedExitNode ; CHECK: SCC #2: %loopexit.5, %loopexit.6, %loopentry.7, %loopentry.6, %loopentry.5, %endif.2 diff --git a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll index 11696cb76c8bb..b7cee46aa6f32 100644 --- a/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll +++ b/llvm/test/Transforms/FunctionSpecialization/ssa-copy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes=print-predicateinfo -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF +; RUN: opt -passes='print' -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=PREDINF ; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ ; RUN: -funcspec-for-literal-constant=true \ ; RUN: -funcspec-min-codesize-savings=50 \ diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index e6de063969a6a..85e8c98455c91 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -1637,3 +1637,58 @@ define <2 x i1> @test_logical_and_icmp_samesign_vec_with_poison_tv(<2 x i8> %x) %and = select <2 x i1> %cmp1, <2 x i1> %cmp2, <2 x i1> zeroinitializer ret <2 x i1> %and } + +define i1 @test_logical_and_trunc_nuw(i1 %c, i8 noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_and_trunc_nuw( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} + +define i1 @test_logical_or_trunc_nuw(i1 %c, i8 noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_or_trunc_nuw( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret i1 [[OR]] +; + %trunc = trunc nuw i8 %x to i1 + %or = select i1 %c, i1 true, i1 %trunc + ret i1 %or +} + +define <2 x i1> @test_logical_and_trunc_nuw_vec(<2 x i1> %c, <2 x i8> noundef range(i8 0,2) %x) { +; CHECK-LABEL: @test_logical_and_trunc_nuw_vec( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw <2 x i8> [[X:%.*]] to <2 x i1> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[C:%.*]], [[TRUNC]] +; CHECK-NEXT: ret <2 x i1> [[AND]] +; + %trunc = trunc nuw <2 x i8> %x to <2 x i1> + %and = select <2 x i1> %c, <2 x i1> %trunc, <2 x i1> zeroinitializer + ret <2 x i1> %and +} + +define i1 @neg_test_logical_and_trunc_nuw_no_range(i1 %c, i8 noundef %x) { +; CHECK-LABEL: @neg_test_logical_and_trunc_nuw_no_range( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C:%.*]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} + +define i1 @neg_test_logical_and_trunc_nuw_no_noundef(i1 %c, i8 range(i8 0,2) %x) { +; CHECK-LABEL: @neg_test_logical_and_trunc_nuw_no_noundef( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc nuw i8 [[X:%.*]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C:%.*]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: ret i1 [[AND]] +; + %trunc = trunc nuw i8 %x to i1 + %and = select i1 %c, i1 %trunc, i1 false + ret i1 %and +} diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 96e74212dce4e..0a98a895f3223 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -epilogue-vectorization-force-VF=4 -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -epilogue-vectorization-force-VF=4 -force-target-supports-masked-memory-ops -S < %s | FileCheck %s define i64 @select_icmp_const(ptr %a, i64 %n) { ; CHECK-LABEL: define i64 @select_icmp_const( @@ -908,3 +908,79 @@ loop: exit: ret i64 %sel } + + +define i32 @predicated_iv_select(ptr %A) { +; CHECK-LABEL: define i32 @predicated_iv_select( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[VECTOR_MAIN_LOOP_ITER_CHECK:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi <8 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[WIDE_LOAD]], splat (i32 -1) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[VEC_IND]], ptr align 4 [[TMP0]], <8 x i1> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5]] = select i1 [[TMP4]], <8 x i1> [[TMP1]], <8 x i1> [[TMP7]] +; CHECK-NEXT: [[TMP6]] = select i1 [[TMP4]], <8 x i32> [[VEC_IND]], <8 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1104 +; CHECK-NEXT: br i1 [[TMP2]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> [[TMP6]], <8 x i1> [[TMP5]], i32 -1) +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1104, %[[VEC_EPILOG_VECTOR_BODY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[TMP8]], %[[VEC_EPILOG_VECTOR_BODY]] ], [ [[LAST_1:%.*]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP15]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label %[[LOOP_THEN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_THEN]]: +; CHECK-NEXT: [[T:%.*]] = trunc nuw nsw i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[T]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[LAST_1]] = phi i32 [ [[T]], %[[LOOP_THEN]] ], [ [[RED]], %[[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1111 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[LAST_1_LCSSA:%.*]] = phi i32 [ [[LAST_1]], %[[LOOP_LATCH]] ] +; CHECK-NEXT: ret i32 [[LAST_1_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %red = phi i32 [ -1, %entry ], [ %last.1, %loop.latch ] + %gep.A = getelementptr inbounds nuw i32, ptr %A, i64 %iv + %0 = load i32, ptr %gep.A, align 4 + %cmp1 = icmp sgt i32 %0, -1 + br i1 %cmp1, label %loop.then, label %loop.latch + +loop.then: + %t = trunc nuw nsw i64 %iv to i32 + store i32 %t, ptr %gep.A, align 4 + br label %loop.latch + +loop.latch: + %last.1 = phi i32 [ %t, %loop.then ], [ %red, %loop.header ] + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1111 + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %last.1 +} diff --git a/llvm/test/Transforms/LoopVectorize/pr25281.ll b/llvm/test/Transforms/LoopVectorize/pr25281.ll index 06031d7e925af..4b8369936123c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr25281.ll +++ b/llvm/test/Transforms/LoopVectorize/pr25281.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -aa-pipeline=scev-aa -passes=loop-vectorize,print-alias-sets -S -o - 2>&1 | FileCheck %s +; RUN: opt < %s -aa-pipeline=scev-aa -passes='loop-vectorize,print' -S -o - 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; PR25281 diff --git a/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll b/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll index ab23d3354da66..646e5383fade4 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/assume-operand-bundles.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -S -passes=print-predicateinfo %s 2>&1 >/dev/null | FileCheck %s +; RUN: opt -S -passes='print' %s 2>&1 >/dev/null | FileCheck %s declare void @use(i1) diff --git a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll index f024106b7299a..8feec397ac545 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/branch-on-same-cond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -S -passes=print-predicateinfo < %s 2>&1 >/dev/null | FileCheck %s +; RUN: opt -S -passes='print' < %s 2>&1 >/dev/null | FileCheck %s ; FIXME: RenamedOp should be %cmp or %x in all cases here, ; which is the value used in the condition. diff --git a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll index 42e8ccb760b3f..e9e36c7a2fdc6 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/condprop.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/condprop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s @a = external global i32 ; [#uses=7] diff --git a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll index 06c02d699c511..4e4ee0f2b09bd 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/diamond.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/diamond.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define i1 @f(i32 %x, i1 %y) { ; CHECK-LABEL: @f( ; CHECK-NEXT: br i1 [[Y:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] diff --git a/llvm/test/Transforms/Util/PredicateInfo/edge.ll b/llvm/test/Transforms/Util/PredicateInfo/edge.ll index 913832696215e..de18a7ad4474f 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/edge.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/edge.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s define i32 @f1(i32 %x) { ; CHECK-LABEL: @f1( diff --git a/llvm/test/Transforms/Util/PredicateInfo/ordering.ll b/llvm/test/Transforms/Util/PredicateInfo/ordering.ll index d7ce9dc652c8b..3a1cdf1ee955d 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/ordering.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/ordering.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -passes=print-predicateinfo -debug < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -debug < %s 2>&1 | FileCheck %s declare void @use(i32) declare void @use.i1(i1) diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll index 4762d376ef5aa..3f26f437c860a 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33456.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 1, align 4 @d = common global i32 0, align 4 diff --git a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll index e4fd4cc6dd8a2..138297ccdb4b5 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/pr33457.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s ; Don't insert predicate info for conditions with a single target. @a = global i32 6, align 4 @c = global i32 -1, align 4 diff --git a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll index d29aadd54128c..bee4fcd292427 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-inst-comments -; RUN: opt -passes=print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' -disable-output < %s 2>&1 | FileCheck %s declare void @foo(i1) declare void @bar(i32) diff --git a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll index faf4bec61c935..0f1685f942fa1 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/unnamed-types.ll @@ -1,4 +1,4 @@ -; RUN: opt -disable-output -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -disable-output -passes='print' < %s 2>&1 | FileCheck %s %1 = type opaque %0 = type opaque diff --git a/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll b/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll index 99314d45913e0..05536b0962906 100644 --- a/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll +++ b/llvm/test/Transforms/Util/PredicateInfo/unreachable.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=print-predicateinfo < %s 2>&1 | FileCheck %s +; RUN: opt -passes='print' < %s 2>&1 | FileCheck %s declare void @foo() declare void @llvm.assume(i1) diff --git a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp index 01285c6c0ec09..265aca75fae36 100644 --- a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp +++ b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp @@ -15,9 +15,11 @@ #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Matchers.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" #define DEBUG_TYPE "math-to-spirv-pattern" @@ -360,75 +362,81 @@ struct PowFOpPattern final : public OpConversionPattern { if (!dstType) return failure(); - // Get the scalar float type. - FloatType scalarFloatType; - if (auto scalarType = dyn_cast(powfOp.getType())) { - scalarFloatType = scalarType; - } else if (auto vectorType = dyn_cast(powfOp.getType())) { - scalarFloatType = cast(vectorType.getElementType()); - } else { - return failure(); + Location loc = powfOp.getLoc(); + Type operandType = adaptor.getRhs().getType(); + + // Parity-based lowering requires an integer-valued constant exponent. + // Otherwise fall back to exp(y*log(x)), which yields NaN for x<0 (matches + // C). + auto isOdd = [](const APFloat &v) { + APSInt i(/*BitWidth=*/64, /*isUnsigned=*/false); + bool ignored; + v.convertToInteger(i, APFloat::rmTowardZero, &ignored); + return i[0]; + }; + + SmallVector oddMask; + Attribute rhsAttr; + if (matchPattern(adaptor.getRhs(), m_Constant(&rhsAttr))) { + TypeSwitch(rhsAttr) + .Case([&](FloatAttr a) { + if (a.getValue().isInteger()) + oddMask.push_back(isOdd(a.getValue())); + }) + .Case([&](SplatElementsAttr a) { + APFloat splat = a.getSplatValue(); + if (splat.isInteger()) + oddMask.push_back(isOdd(splat)); + }) + .Case([&](DenseElementsAttr a) { + SmallVector mask; + for (const APFloat &elt : a.getValues()) { + if (!elt.isInteger()) + return; + mask.push_back(isOdd(elt)); + } + oddMask = std::move(mask); + }); } - // Get int type of the same shape as the float type. - Type scalarIntType = rewriter.getIntegerType(32); - Type intType = scalarIntType; - auto operandType = adaptor.getRhs().getType(); - if (auto vectorType = dyn_cast(operandType)) { - auto shape = vectorType.getShape(); - intType = VectorType::get(shape, scalarIntType); + if (oddMask.empty()) { + Value log = spirv::GLLogOp::create(rewriter, loc, adaptor.getLhs()); + Value mul = spirv::FMulOp::create(rewriter, loc, adaptor.getRhs(), log); + rewriter.replaceOpWithNewOp(powfOp, mul); + return success(); + } + + // GL.Pow is undefined for x < 0; take abs and conditionally negate the + // result for lanes whose exponent is odd. + Value abs = spirv::GLFAbsOp::create(rewriter, loc, adaptor.getLhs()); + Value pow = spirv::GLPowOp::create(rewriter, loc, abs, adaptor.getRhs()); + + // No odd-parity element: result has the same sign as |lhs|^rhs >= 0. + if (llvm::none_of(oddMask, [](bool b) { return b; })) { + rewriter.replaceOp(powfOp, pow); + return success(); } - // Per GL Pow extended instruction spec: - // "Result is undefined if x < 0. Result is undefined if x = 0 and y <= 0." - Location loc = powfOp.getLoc(); Value zero = spirv::ConstantOp::getZero(operandType, loc, rewriter); Value lessThan = spirv::FOrdLessThanOp::create(rewriter, loc, adaptor.getLhs(), zero); - - // Per C/C++ spec: - // > pow(base, exponent) returns NaN (and raises FE_INVALID) if base is - // > finite and negative and exponent is finite and non-integer. - // Calculate the reminder from the exponent and check whether it is zero. - Value floatOne = spirv::ConstantOp::getOne(operandType, loc, rewriter); - Value expRem = - spirv::FRemOp::create(rewriter, loc, adaptor.getRhs(), floatOne); - Value expRemNonZero = - spirv::FOrdNotEqualOp::create(rewriter, loc, expRem, zero); - Value cmpNegativeWithFractionalExp = - spirv::LogicalAndOp::create(rewriter, loc, expRemNonZero, lessThan); - // Create NaN result and replace base value if conditions are met. - const auto &floatSemantics = scalarFloatType.getFloatSemantics(); - const auto nan = APFloat::getNaN(floatSemantics); - Attribute nanAttr = rewriter.getFloatAttr(scalarFloatType, nan); - if (auto vectorType = dyn_cast(operandType)) - nanAttr = DenseElementsAttr::get(vectorType, nan); - - Value nanValue = - spirv::ConstantOp::create(rewriter, loc, operandType, nanAttr); - Value lhs = - spirv::SelectOp::create(rewriter, loc, cmpNegativeWithFractionalExp, - nanValue, adaptor.getLhs()); - Value abs = spirv::GLFAbsOp::create(rewriter, loc, lhs); - - // TODO: The following just forcefully casts y into an integer value in - // order to properly propagate the sign, assuming integer y cases. It - // doesn't cover other cases and should be fixed. - - // Cast exponent to integer and calculate exponent % 2 != 0. - Value intRhs = - spirv::ConvertFToSOp::create(rewriter, loc, intType, adaptor.getRhs()); - Value intOne = spirv::ConstantOp::getOne(intType, loc, rewriter); - Value bitwiseAndOne = - spirv::BitwiseAndOp::create(rewriter, loc, intRhs, intOne); - Value isOdd = spirv::IEqualOp::create(rewriter, loc, bitwiseAndOne, intOne); - - // calculate pow based on abs(lhs)^rhs. - Value pow = spirv::GLPowOp::create(rewriter, loc, abs, adaptor.getRhs()); Value negate = spirv::FNegateOp::create(rewriter, loc, pow); - // if the exponent is odd and lhs < 0, negate the result. - Value shouldNegate = - spirv::LogicalAndOp::create(rewriter, loc, lessThan, isOdd); + + Value shouldNegate; + if (llvm::all_equal(oddMask)) { + // Every lane has odd exponent: negate iff lhs < 0. + shouldNegate = lessThan; + } else { + // Mixed parity (non-splat dense vector): AND lhs<0 with a per-element + // constant odd-mask. + auto vecType = cast(operandType); + auto maskType = VectorType::get(vecType.getShape(), rewriter.getI1Type()); + Value oddConst = spirv::ConstantOp::create( + rewriter, loc, maskType, DenseElementsAttr::get(maskType, oddMask)); + shouldNegate = + spirv::LogicalAndOp::create(rewriter, loc, lessThan, oddConst); + } + rewriter.replaceOpWithNewOp(powfOp, shouldNegate, negate, pow); return success(); diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir index 8eb533eeff2a9..08d7822d04cc1 100644 --- a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir +++ b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir @@ -183,45 +183,98 @@ func.func @ctlz_vector2(%val: vector<2xi32>) -> vector<2xi32> { return %0 : vector<2xi32> } +// Dynamic exponent: exp(y * log(x)); yields NaN for x<0. // CHECK-LABEL: @powf_scalar // CHECK-SAME: (%[[LHS:.+]]: f32, %[[RHS:.+]]: f32) func.func @powf_scalar(%lhs: f32, %rhs: f32) -> f32 { + // CHECK: %[[LOG:.+]] = spirv.GL.Log %[[LHS]] : f32 + // CHECK: %[[MUL:.+]] = spirv.FMul %[[RHS]], %[[LOG]] : f32 + // CHECK: %[[EXP:.+]] = spirv.GL.Exp %[[MUL]] : f32 + %0 = math.powf %lhs, %rhs : f32 + // CHECK: return %[[EXP]] + return %0: f32 +} + +// CHECK-LABEL: @powf_vector +func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { + // CHECK: spirv.GL.Log %{{.*}} : vector<4xf32> + // CHECK: spirv.FMul %{{.*}} : vector<4xf32> + // CHECK: spirv.GL.Exp %{{.*}} : vector<4xf32> + %0 = math.powf %lhs, %rhs : vector<4xf32> + return %0: vector<4xf32> +} + +// Constant odd integer exponent: parity is known statically, so the lowering +// drops the runtime FToS/BitwiseAnd/IEqual/LogicalAnd parity computation. +// CHECK-LABEL: @powf_const_odd_int_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_odd_int_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 3.000000e+00 : f32 + // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[LHS]] : f32 + // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 // CHECK: %[[F0:.+]] = spirv.Constant 0.000000e+00 : f32 // CHECK: %[[LT:.+]] = spirv.FOrdLessThan %[[LHS]], %[[F0]] : f32 - // CHECK: %[[F1:.+]] = spirv.Constant 1.000000e+00 : f32 - // CHECK: %[[REM:.+]] = spirv.FRem %[[RHS]], %[[F1]] : f32 - // CHECK: %[[IS_FRACTION:.+]] = spirv.FOrdNotEqual %[[REM]], %[[F0]] : f32 - // CHECK: %[[AND:.+]] = spirv.LogicalAnd %[[IS_FRACTION]], %[[LT]] : i1 - // CHECK: %[[NAN:.+]] = spirv.Constant 0x7FC00000 : f32 - // CHECK: %[[NEW_LHS:.+]] = spirv.Select %[[AND]], %[[NAN]], %[[LHS]] : i1, f32 - // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[NEW_LHS]] : f32 - // CHECK: %[[IRHS:.+]] = spirv.ConvertFToS - // CHECK: %[[CST1:.+]] = spirv.Constant 1 : i32 - // CHECK: %[[REM:.+]] = spirv.BitwiseAnd %[[IRHS]] - // CHECK: %[[ODD:.+]] = spirv.IEqual %[[REM]], %[[CST1]] : i32 - // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 // CHECK: %[[NEG:.+]] = spirv.FNegate %[[POW]] : f32 - // CHECK: %[[SNEG:.+]] = spirv.LogicalAnd %[[LT]], %[[ODD]] : i1 - // CHECK: %[[SEL:.+]] = spirv.Select %[[SNEG]], %[[NEG]], %[[POW]] : i1, f32 - %0 = math.powf %lhs, %rhs : f32 + // CHECK: %[[SEL:.+]] = spirv.Select %[[LT]], %[[NEG]], %[[POW]] : i1, f32 + %c = arith.constant 3.0 : f32 + %0 = math.powf %lhs, %c : f32 // CHECK: return %[[SEL]] return %0: f32 } -// CHECK-LABEL: @powf_vector -func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { +// Constant even integer exponent: result is non-negative, no select needed. +// CHECK-LABEL: @powf_const_even_int_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_even_int_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 4.000000e+00 : f32 + // CHECK: %[[ABS:.+]] = spirv.GL.FAbs %[[LHS]] : f32 + // CHECK: %[[POW:.+]] = spirv.GL.Pow %[[ABS]], %[[RHS]] : f32 + %c = arith.constant 4.0 : f32 + %0 = math.powf %lhs, %c : f32 + // CHECK: return %[[POW]] + return %0: f32 +} + +// Constant non-integer exponent: falls into the dynamic exp(y*log(x)) path. +// CHECK-LABEL: @powf_const_frac_exp +// CHECK-SAME: (%[[LHS:.+]]: f32) +func.func @powf_const_frac_exp(%lhs: f32) -> f32 { + // CHECK: %[[RHS:.+]] = arith.constant 2.500000e+00 : f32 + // CHECK: %[[LOG:.+]] = spirv.GL.Log %[[LHS]] : f32 + // CHECK: %[[MUL:.+]] = spirv.FMul %[[RHS]], %[[LOG]] : f32 + // CHECK: %[[EXP:.+]] = spirv.GL.Exp %[[MUL]] : f32 + %c = arith.constant 2.5 : f32 + %0 = math.powf %lhs, %c : f32 + // CHECK: return %[[EXP]] + return %0: f32 +} + +// Splat constant odd integer-valued vector exponent: uniform odd parity. +// CHECK-LABEL: @powf_const_odd_int_exp_vector +func.func @powf_const_odd_int_exp_vector(%lhs: vector<4xf32>) -> vector<4xf32> { + // CHECK: spirv.GL.FAbs + // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> // CHECK: spirv.FOrdLessThan - // CHECK: spirv.FRem - // CHECK: spirv.FOrdNotEqual - // CHECK: spirv.LogicalAnd + // CHECK: spirv.FNegate // CHECK: spirv.Select + %c = arith.constant dense<3.0> : vector<4xf32> + %0 = math.powf %lhs, %c : vector<4xf32> + return %0: vector<4xf32> +} + +// Mixed-parity constant integer-valued vector exponent: per-element odd-mask +// constant is materialized and AND-ed with lhs<0. +// CHECK-LABEL: @powf_const_mixed_int_exp_vector +func.func @powf_const_mixed_int_exp_vector(%lhs: vector<4xf32>) -> vector<4xf32> { // CHECK: spirv.GL.FAbs - // CHECK: spirv.BitwiseAnd %{{.*}} : vector<4xi32> - // CHECK: spirv.IEqual %{{.*}} : vector<4xi32> // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> + // CHECK: spirv.FOrdLessThan // CHECK: spirv.FNegate + // CHECK: %[[ODD:.+]] = spirv.Constant dense<[true, false, true, false]> : vector<4xi1> + // CHECK: spirv.LogicalAnd %{{.*}}, %[[ODD]] : vector<4xi1> // CHECK: spirv.Select - %0 = math.powf %lhs, %rhs : vector<4xf32> + %c = arith.constant dense<[3.0, 2.0, 5.0, 4.0]> : vector<4xf32> + %0 = math.powf %lhs, %c : vector<4xf32> return %0: vector<4xf32> }