Skip to content

Commit afc5154

Browse files
Fix gpu pipeline and pass lowering (#41)
* Fix optimization pipeline for GPU target regions - Use the old pass manager available through llvmlite 0.44 - Separate linking of libdevice and the openmp device RTL - Optimize with opt=2 with libdevice - Optimize with opt=1 with device RTL to avoid miscompilation and openmp synchronization errors * Remove unsafe inttoptr for scalar firstprivate - Pass original values to copy as pointers to avoid ABI issues * Remove unsafe parameter attributes * Fix init/fini for the pass manager with function passes
1 parent 2e2e22c commit afc5154

2 files changed

Lines changed: 85 additions & 217 deletions

File tree

src/numba/openmp/libs/pass/CGIntrinsicsOpenMP.cpp

Lines changed: 14 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -327,17 +327,8 @@ OutlinedInfoStruct CGIntrinsicsOpenMP::createOutlinedFunction(
327327
Params.push_back(OMPBuilder.Int32Ptr);
328328
for (auto *V : CapturedShared)
329329
Params.push_back(V->getType());
330-
for (auto *V : CapturedFirstprivate) {
331-
Type *VPtrElemTy = getPointeeType(DSAValueMap, V);
332-
if (VPtrElemTy->isSingleValueType())
333-
// TODO: The OpenMP runtime expects and propagates arguments
334-
// typed as Int64, thus we cast byval firstprivates to Int64. Using an
335-
// aggregate to store arguments would avoid this peculiarity.
336-
// Params.push_back(VPtrElemTy);
337-
Params.push_back(OMPBuilder.Int64);
338-
else
339-
Params.push_back(V->getType());
340-
}
330+
for (auto *V : CapturedFirstprivate)
331+
Params.push_back(V->getType());
341332
for (auto *V : Reductions)
342333
Params.push_back(V->getType());
343334

@@ -359,32 +350,16 @@ OutlinedInfoStruct CGIntrinsicsOpenMP::createOutlinedFunction(
359350
// globalized pointer.
360351
if (DeviceGlobalizedValues.contains(V))
361352
DeviceGlobalizedValues.insert(AI);
362-
363-
OutlinedFn->addParamAttr(arg_no, Attribute::NonNull);
364-
OutlinedFn->addParamAttr(
365-
arg_no, Attribute::get(M.getContext(), Attribute::Dereferenceable, 8));
366353
++AI;
367354
++arg_no;
368355
}
369356
for (auto *V : CapturedFirstprivate) {
370-
Type *VPtrElemTy = getPointeeType(DSAValueMap, V);
371-
if (VPtrElemTy->isSingleValueType()) {
372-
AI->setName(V->getName() + ".firstprivate.byval");
373-
} else {
374-
AI->setName(V->getName() + ".firstprivate");
375-
OutlinedFn->addParamAttr(arg_no, Attribute::NonNull);
376-
OutlinedFn->addParamAttr(
377-
arg_no,
378-
Attribute::get(M.getContext(), Attribute::Dereferenceable, 8));
379-
}
357+
AI->setName(V->getName() + ".firstprivate");
380358
++AI;
381359
++arg_no;
382360
}
383361
for (auto *V : Reductions) {
384362
AI->setName(V->getName() + ".red");
385-
OutlinedFn->addParamAttr(arg_no, Attribute::NonNull);
386-
OutlinedFn->addParamAttr(
387-
arg_no, Attribute::get(M.getContext(), Attribute::Dereferenceable, 8));
388363
++AI;
389364
++arg_no;
390365
}
@@ -463,27 +438,14 @@ OutlinedInfoStruct CGIntrinsicsOpenMP::createOutlinedFunction(
463438
Type *VPtrElemTy = getPointeeType(DSAValueMap, V);
464439
Value *ReplacementValue =
465440
CreateAllocaAtEntry(VPtrElemTy, nullptr, V->getName() + ".copy");
466-
if (VPtrElemTy->isSingleValueType()) {
467-
// TODO: The OpenMP runtime expects and propagates arguments
468-
// typed as Int64, thus we cast byval firstprivates to Int64. Using an
469-
// aggregate to store arguments would avoid this peculiarity.
470-
// OMPBuilder.Builder.CreateStore(AI, ReplacementValue);
471-
Value *Alloca = CreateAllocaAtEntry(OMPBuilder.Int64);
472-
473-
OMPBuilder.Builder.CreateStore(AI, Alloca);
474-
Value *BitCast = OMPBuilder.Builder.CreateBitCast(Alloca, V->getType());
475-
Value *Load = OMPBuilder.Builder.CreateLoad(VPtrElemTy, BitCast);
441+
Value *Load =
442+
OMPBuilder.Builder.CreateLoad(VPtrElemTy, AI, V->getName() + ".reload");
443+
FunctionCallee CopyConstructor = DSAValueMap[V].CopyConstructor;
444+
if (CopyConstructor) {
445+
Value *Copy = OMPBuilder.Builder.CreateCall(CopyConstructor, {Load});
446+
OMPBuilder.Builder.CreateStore(Copy, ReplacementValue);
447+
} else
476448
OMPBuilder.Builder.CreateStore(Load, ReplacementValue);
477-
} else {
478-
Value *Load = OMPBuilder.Builder.CreateLoad(VPtrElemTy, AI,
479-
V->getName() + ".reload");
480-
FunctionCallee CopyConstructor = DSAValueMap[V].CopyConstructor;
481-
if (CopyConstructor) {
482-
Value *Copy = OMPBuilder.Builder.CreateCall(CopyConstructor, {Load});
483-
OMPBuilder.Builder.CreateStore(Copy, ReplacementValue);
484-
} else
485-
OMPBuilder.Builder.CreateStore(Load, ReplacementValue);
486-
}
487449

488450
if (VMap)
489451
(*VMap)[V] = ReplacementValue;
@@ -643,25 +605,8 @@ void CGIntrinsicsOpenMP::emitOMPParallelHostRuntime(
643605
ForkArgs.append({Ident, OMPBuilder.Builder.getInt32(CapturedVars.size()),
644606
OutlinedFnCast});
645607

646-
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx) {
647-
Type *VPtrElemTy = getPointeeType(DSAValueMap, CapturedVars[Idx]);
648-
// Pass firstprivate scalar by value.
649-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE &&
650-
VPtrElemTy->isSingleValueType()) {
651-
// TODO: check type conversions.
652-
Value *Alloca = OMPBuilder.Builder.CreateAlloca(OMPBuilder.Int64);
653-
Value *LoadV =
654-
OMPBuilder.Builder.CreateLoad(VPtrElemTy, CapturedVars[Idx]);
655-
Value *BitCast = OMPBuilder.Builder.CreateBitCast(
656-
Alloca, CapturedVars[Idx]->getType());
657-
OMPBuilder.Builder.CreateStore(LoadV, BitCast);
658-
Value *Load = OMPBuilder.Builder.CreateLoad(OMPBuilder.Int64, Alloca);
659-
ForkArgs.push_back(Load);
660-
continue;
661-
}
662-
608+
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx)
663609
ForkArgs.push_back(CapturedVars[Idx]);
664-
}
665610

666611
OMPBuilder.Builder.CreateCall(ForkCall, ForkArgs);
667612
};
@@ -681,20 +626,8 @@ void CGIntrinsicsOpenMP::emitOMPParallelHostRuntime(
681626
ZeroAddr);
682627
// Zero for thread id, bound tid.
683628
SmallVector<Value *, 16> OutlinedArgs = {ZeroAddr, ZeroAddr};
684-
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx) {
685-
Type *VPtrElemTy = getPointeeType(DSAValueMap, CapturedVars[Idx]);
686-
// Pass firstprivate scalar by value.
687-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE &&
688-
VPtrElemTy->isSingleValueType()) {
689-
// TODO: check type conversions.
690-
Value *Load =
691-
OMPBuilder.Builder.CreateLoad(VPtrElemTy, CapturedVars[Idx]);
692-
OutlinedArgs.push_back(Load);
693-
continue;
694-
}
695-
629+
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx)
696630
OutlinedArgs.push_back(CapturedVars[Idx]);
697-
}
698631

699632
OMPBuilder.Builder.CreateCall(OutlinedFn, OutlinedArgs);
700633

@@ -809,29 +742,6 @@ void CGIntrinsicsOpenMP::emitOMPParallelDeviceRuntime(
809742
Value *GEP = OMPBuilder.Builder.CreateConstInBoundsGEP1_64(
810743
OMPBuilder.Int8Ptr, LoadGlobalArgs, Idx);
811744

812-
// Pass firstprivate scalar by value.
813-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE) {
814-
Type *VPtrElemTy = getPointeeType(DSAValueMap, CapturedVars[Idx]);
815-
if (VPtrElemTy->isSingleValueType()) {
816-
Value *Bitcast =
817-
OMPBuilder.Builder.CreateBitCast(GEP, CapturedVars[Idx]->getType());
818-
Value *Load = OMPBuilder.Builder.CreateLoad(VPtrElemTy, Bitcast);
819-
// TODO: Runtime expects values in Int64 type, fix with arguments in
820-
// struct.
821-
AllocaInst *TmpInt64 = OMPBuilder.Builder.CreateAlloca(
822-
OMPBuilder.Int64, nullptr,
823-
CapturedVars[Idx]->getName() + "fpriv.byval");
824-
Value *Cast = OMPBuilder.Builder.CreateBitCast(
825-
TmpInt64, CapturedVars[Idx]->getType());
826-
OMPBuilder.Builder.CreateStore(Load, Cast);
827-
Value *ConvLoad =
828-
OMPBuilder.Builder.CreateLoad(OMPBuilder.Int64, TmpInt64);
829-
OutlinedFnArgs.push_back(ConvLoad);
830-
831-
continue;
832-
}
833-
}
834-
835745
Value *Bitcast = OMPBuilder.Builder.CreateBitCast(
836746
GEP, CapturedVars[Idx]->getType()->getPointerTo());
837747
Value *Load =
@@ -892,21 +802,6 @@ void CGIntrinsicsOpenMP::emitOMPParallelDeviceRuntime(
892802
Value *GEP = OMPBuilder.Builder.CreateConstInBoundsGEP2_64(
893803
CapturedVarsAddrsTy, CapturedVarsAddrs, 0, Idx);
894804

895-
// Pass firstprivate scalar by value.
896-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE) {
897-
if (getPointeeType(DSAValueMap, CapturedVars[Idx])->isSingleValueType()) {
898-
// TODO: check type conversions.
899-
Value *BitCast = OMPBuilder.Builder.CreateBitCast(CapturedVars[Idx],
900-
OMPBuilder.Int64Ptr);
901-
Value *Load = OMPBuilder.Builder.CreateLoad(OMPBuilder.Int64, BitCast);
902-
Value *IntToPtr =
903-
OMPBuilder.Builder.CreateIntToPtr(Load, OMPBuilder.Int8Ptr);
904-
OMPBuilder.Builder.CreateStore(IntToPtr, GEP);
905-
906-
continue;
907-
}
908-
}
909-
910805
// Allocate from global memory if the pointer is not globalized (not in the
911806
// global address space).
912807
FunctionCallee KmpcAllocShared =
@@ -2660,30 +2555,8 @@ void CGIntrinsicsOpenMP::emitOMPTeamsDeviceRuntime(
26602555
SmallVector<Value *, 8> Args;
26612556
Args.append({ThreadIDAddr, ZeroAddr});
26622557

2663-
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx) {
2664-
// Pass firstprivate scalar by value.
2665-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE) {
2666-
Type *VPtrElemTy = getPointeeType(DSAValueMap, CapturedVars[Idx]);
2667-
if (VPtrElemTy->isSingleValueType()) {
2668-
Value *Load =
2669-
OMPBuilder.Builder.CreateLoad(VPtrElemTy, CapturedVars[Idx]);
2670-
// TODO: Runtime expects values in Int64 type, fix with arguments in
2671-
// struct.
2672-
AllocaInst *TmpInt64 = OMPBuilder.Builder.CreateAlloca(
2673-
OMPBuilder.Int64, nullptr,
2674-
CapturedVars[Idx]->getName() + "fpriv.byval");
2675-
Value *Cast = OMPBuilder.Builder.CreateBitCast(
2676-
TmpInt64, CapturedVars[Idx]->getType());
2677-
OMPBuilder.Builder.CreateStore(Load, Cast);
2678-
Value *ConvLoad =
2679-
OMPBuilder.Builder.CreateLoad(OMPBuilder.Int64, TmpInt64);
2680-
Args.push_back(ConvLoad);
2681-
2682-
continue;
2683-
}
2684-
}
2558+
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx)
26852559
Args.push_back(CapturedVars[Idx]);
2686-
}
26872560

26882561
auto *CI = checkCreateCall(OMPBuilder.Builder, TeamsOutlinedFn, Args);
26892562
assert(CI && "Expected valid call");
@@ -2766,30 +2639,8 @@ void CGIntrinsicsOpenMP::emitOMPTeamsHostRuntime(
27662639
OMPBuilder.Builder.CreateBitCast(OutlinedFn,
27672640
OMPBuilder.ParallelTaskPtr)});
27682641

2769-
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx) {
2770-
// Pass firstprivate scalar by value.
2771-
if (DSAValueMap[CapturedVars[Idx]].Type == DSA_FIRSTPRIVATE) {
2772-
Type *VPtrElemTy = getPointeeType(DSAValueMap, CapturedVars[Idx]);
2773-
if (VPtrElemTy->isSingleValueType()) {
2774-
Value *Load =
2775-
OMPBuilder.Builder.CreateLoad(VPtrElemTy, CapturedVars[Idx]);
2776-
// TODO: Runtime expects values in Int64 type, fix with arguments in
2777-
// struct.
2778-
AllocaInst *TmpInt64 = OMPBuilder.Builder.CreateAlloca(
2779-
OMPBuilder.Int64, nullptr,
2780-
CapturedVars[Idx]->getName() + ".fpriv.byval");
2781-
Value *Cast = OMPBuilder.Builder.CreateBitCast(
2782-
TmpInt64, CapturedVars[Idx]->getType());
2783-
OMPBuilder.Builder.CreateStore(Load, Cast);
2784-
Value *ConvLoad =
2785-
OMPBuilder.Builder.CreateLoad(OMPBuilder.Int64, TmpInt64);
2786-
Args.push_back(ConvLoad);
2787-
2788-
continue;
2789-
}
2790-
}
2642+
for (size_t Idx = 0; Idx < CapturedVars.size(); ++Idx)
27912643
Args.push_back(CapturedVars[Idx]);
2792-
}
27932644

27942645
auto *CI = checkCreateCall(OMPBuilder.Builder, ForkTeams, Args);
27952646
assert(CI && "Expected valid call");

0 commit comments

Comments
 (0)