11from numba .core import (
22 ir ,
33 types ,
4- cgutils ,
54 typing ,
65 transforms ,
76 bytecode ,
@@ -85,14 +84,12 @@ def openmp_region_alloca(obj, alloca_instr, typ):
8584
8685
8786def push_alloca_callback (lowerer , callback , data , builder ):
88- # cgutils.push_alloca_callbacks(callback, data)
8987 if not hasattr (builder , "_lowerer_push_alloca_callbacks" ):
9088 builder ._lowerer_push_alloca_callbacks = 0
9189 builder ._lowerer_push_alloca_callbacks += 1
9290
9391
9492def pop_alloca_callback (lowerer , builder ):
95- # cgutils.pop_alloca_callbacks()
9693 builder ._lowerer_push_alloca_callbacks -= 1
9794
9895
@@ -865,16 +862,6 @@ def alloca(self, alloca_instr, typ):
865862 # process them then they won't lead to infinite recursion.
866863 self .alloca_queue .append ((alloca_instr , typ ))
867864
868- def process_alloca_queue (self ):
869- # This should be old code...making sure with the assertion.
870- assert len (self .alloca_queue ) == 0
871- has_update = False
872- for alloca_instr , typ in self .alloca_queue :
873- has_update = self .process_one_alloca (alloca_instr , typ ) or has_update
874- if has_update :
875- self .update_tags ()
876- self .alloca_queue = []
877-
878865 def post_lowering_process_alloca_queue (self , enter_directive ):
879866 has_update = False
880867 if DEBUG_OPENMP >= 1 :
@@ -951,7 +938,6 @@ def needs_implicit_vars(self):
951938
952939 def update_context (self , context , builder ):
953940 cctyp = type (context .call_conv )
954- # print("start update_context id(context)", id(context), "id(const.call_conv)", id(context.call_conv), "cctyp", cctyp, "id(cctyp)", id(cctyp))
955941
956942 if (
957943 not hasattr (cctyp , "pyomp_patch_installed" )
@@ -974,7 +960,6 @@ def pyomp_return_user_exc(self, builder, *args, **kwargs):
974960 self .orig_return_user_exc (builder , * args , ** kwargs )
975961
976962 setattr (cctyp , "return_user_exc" , pyomp_return_user_exc )
977- # print("after", id(pyomp_return_user_exc), id(cctyp.return_user_exc))
978963
979964 setattr (
980965 cctyp , "orig_return_status_propagate" , cctyp .return_status_propagate
@@ -988,14 +973,12 @@ def pyomp_return_status_propagate(self, builder, *args, **kwargs):
988973 setattr (cctyp , "return_status_propagate" , pyomp_return_status_propagate )
989974
990975 cemtyp = type (context .error_model )
991- # print("start update_context id(context)", id(context), "id(const.error_model)", id(context.error_model), "cemtyp", cemtyp, "id(cemtyp)", id(cemtyp))
992976
993977 if (
994978 not hasattr (cemtyp , "pyomp_patch_installed" )
995979 or not cemtyp .pyomp_patch_installed
996980 ):
997981 cemtyp .pyomp_patch_installed = True
998- # print("update_context", "id(cemtyp.return_user_exec)", id(cemtyp.fp_zero_division), "id(context)", id(context))
999982 setattr (cemtyp , "orig_fp_zero_division" , cemtyp .fp_zero_division )
1000983
1001984 def pyomp_fp_zero_division (self , builder , * args , ** kwargs ):
@@ -1031,10 +1014,6 @@ def fix_dispatchers(self, typemap, typingctx, cuda_target):
10311014 if isinstance (v , Dispatcher ) and not isinstance (
10321015 v , numba_cuda .types .CUDADispatcher
10331016 ):
1034- # targetoptions = v.targetoptions.copy()
1035- # targetoptions['device'] = True
1036- # targetoptions['debug'] = targetoptions.get('debug', False)
1037- # targetoptions['opt'] = targetoptions.get('opt', True)
10381017 vdispatcher = v .dispatcher
10391018 vdispatcher .targetoptions .pop ("nopython" , None )
10401019 vdispatcher .targetoptions .pop ("boundscheck" , None )
@@ -1103,7 +1082,6 @@ def lower(self, lowerer):
11031082 )
11041083 )
11051084 assert count_alloca_instr == 0
1106- # self.tags = list(filter(lambda x: not isinstance(x.arg, lir.instructions.AllocaInstr), self.tags))
11071085 if DEBUG_OPENMP >= 1 :
11081086 print ("after LLVM tag filter" , self .tags , len (self .tags ))
11091087 for otag in self .tags :
@@ -1308,7 +1286,9 @@ def add_struct_tags(self, var_table):
13081286 if isinstance (device_tag .arg , int ):
13091287 selected_device = device_tag .arg
13101288 else :
1311- assert False
1289+ raise ValueError (
1290+ "Device tag argument must be an integer device number."
1291+ )
13121292 if DEBUG_OPENMP >= 1 :
13131293 print ("new selected device:" , selected_device )
13141294 else :
@@ -1412,7 +1392,6 @@ def fixup_openmp_pairs(blocks):
14121392 start_region .tags .append (openmp_tag ("OMP.DEVICE" ))
14131393 end_region = blocks [end_block ].body [ebindex ]
14141394 # assert(start_region.omp_region_var is None)
1415- assert len (start_region .alloca_queue ) == 0
14161395 # Make start and end copies point at each other.
14171396 end_region .start_region = start_region
14181397 start_region .end_region = end_region
@@ -1502,16 +1481,9 @@ def add_mapped_to_ins(ins, tags):
15021481 if DEBUG_OPENMP >= 1 :
15031482 print ("region_info:" , region_info )
15041483 transforms ._loop_lift_prepare_loop_func (region_info , region_blocks )
1505- # exit_block_label = max(region_blocks.keys())
1506- # region_blocks[exit_block_label].body = []
1507- # exit_scope = region_blocks[exit_block_label].scope
1508- # tmp = exit_scope.make_temp(loc=func_ir.loc)
1509- # region_blocks[exit_block_label].append(ir.Assign(value=ir.Const(0, func_ir.loc), target=tmp, loc=func_ir.loc))
1510- # region_blocks[exit_block_label].append(ir.Return(value=tmp, loc=func_ir.loc))
15111484
15121485 target_args = []
15131486 outline_arg_typs = []
1514- # outline_arg_typs = [None] * len(target_args_unordered)
15151487 for tag in self .tags :
15161488 if DEBUG_OPENMP >= 1 :
15171489 print (1 , "target_arg?" , tag , tag .non_arg , is_target_arg (tag .name ))
@@ -1528,7 +1500,6 @@ def add_mapped_to_ins(ins, tags):
15281500 if DEBUG_OPENMP >= 1 :
15291501 print (1 , "found cpointer target_arg" , tag , atyp , id (atyp ))
15301502 else :
1531- # outline_arg_typs[target_arg_index] = atyp
15321503 outline_arg_typs .append (atyp )
15331504 if DEBUG_OPENMP >= 1 :
15341505 print (1 , "found target_arg" , tag , atyp , id (atyp ))
@@ -1647,7 +1618,6 @@ def prepend_device_to_func_name(outlined_ir):
16471618 for k in targetctx .__dict__ .keys () - {"call_conv" }
16481619 }
16491620 )
1650- # subtarget.install_registry(imputils.builtin_registry)
16511621 # Turn off the Numba runtime (incref and decref mostly) for the target compilation.
16521622 subtarget .enable_nrt = False
16531623 typingctx_outlined = targetctx .typing_context
@@ -1677,7 +1647,6 @@ def prepend_device_to_func_name(outlined_ir):
16771647 device_func_name , typingctx_outlined
16781648 )
16791649 device_target .fndesc = fndesc
1680- # device_target = cuda_descriptor.cuda_target.target_context
16811650
16821651 device_lowerer_pipeline = OnlyLowerCUDA
16831652 openmp_cuda_target = numba_cuda .descriptor .CUDATarget ("openmp_cuda" )
@@ -1851,58 +1820,22 @@ def prepend_device_to_func_name(outlined_ir):
18511820 f"Unsupported OpenMP device number { selected_device } , type { device_type } , vendor { device_vendor } , arch { get_device_arch (selected_device )} "
18521821 )
18531822
1854- # if cuda then run ptxas on the cres and pass that
1855-
1856- # bytes_array_typ = lir.ArrayType(cgutils.voidptr_t, len(target_elf))
1857- # bytes_array_typ = lir.ArrayType(cgutils.int8_t, len(target_elf))
1858- # dev_image = cgutils.add_global_variable(mod, bytes_array_typ, ".omp_offloading.device_image")
1859- # dev_image.initializer = lir.Constant.array(cgutils.int8_t, target_elf)
1860- # dev_image.initializer = lir.Constant.array(cgutils.int8_t, target_elf)
1861- add_target_globals_in_numba = int (
1862- os .environ .get ("NUMBA_OPENMP_ADD_TARGET_GLOBALS" , 0 )
1863- )
1864- if add_target_globals_in_numba != 0 :
1865- elftext = cgutils .make_bytearray (target_elf )
1866- dev_image = targetctx .insert_unique_const (
1867- mod , ".omp_offloading.device_image" , elftext
1868- )
1869- mangled_name = cgutils .make_bytearray (
1870- cres .fndesc .mangled_name .encode ("utf-8" ) + b"\x00 "
1871- )
1872- mangled_var = targetctx .insert_unique_const (
1873- mod , ".omp_offloading.entry_name" , mangled_name
1874- )
1875-
1876- llvmused_typ = lir .ArrayType (cgutils .voidptr_t , 2 )
1877- llvmused_gv = cgutils .add_global_variable (
1878- mod , llvmused_typ , "llvm.used"
1879- )
1880- llvmused_syms = [
1881- lir .Constant .bitcast (dev_image , cgutils .voidptr_t ),
1882- lir .Constant .bitcast (mangled_var , cgutils .voidptr_t ),
1883- ]
1884- llvmused_gv .initializer = lir .Constant .array (
1885- cgutils .voidptr_t , llvmused_syms
1886- )
1887- llvmused_gv .linkage = "appending"
1888- else :
1889- host_side_target_tags .append (
1890- openmp_tag (
1891- "QUAL.OMP.TARGET.DEV_FUNC" ,
1892- StringLiteral (cres .fndesc .mangled_name .encode ("utf-8" )),
1893- )
1894- )
1895- host_side_target_tags .append (
1896- openmp_tag ("QUAL.OMP.TARGET.ELF" , StringLiteral (target_elf ))
1823+ host_side_target_tags .append (
1824+ openmp_tag (
1825+ "QUAL.OMP.TARGET.DEV_FUNC" ,
1826+ StringLiteral (cres .fndesc .mangled_name .encode ("utf-8" )),
18971827 )
1828+ )
1829+ host_side_target_tags .append (
1830+ openmp_tag ("QUAL.OMP.TARGET.ELF" , StringLiteral (target_elf ))
1831+ )
18981832
18991833 if DEBUG_OPENMP >= 1 :
19001834 dprint_func_ir (func_ir , "target after outline compiled func_ir" )
19011835
19021836 llvm_token_t = TokenType ()
19031837 fnty = lir .FunctionType (llvm_token_t , [])
19041838 tags_to_include = self .tags + host_side_target_tags
1905- # tags_to_include = list(filter(lambda x: x.name != "DIR.OMP.TARGET", tags_to_include))
19061839 self .filtered_tag_length = len (tags_to_include )
19071840 if DEBUG_OPENMP >= 1 :
19081841 print ("filtered_tag_length:" , self .filtered_tag_length )
@@ -1986,9 +1919,6 @@ def lower(self, lowerer):
19861919 # which only happens if tag length > 0.
19871920 pop_alloca_callback (lowerer , builder )
19881921
1989- # Process the accumulated allocas in the start region.
1990- self .start_region .process_alloca_queue ()
1991-
19921922 assert self .start_region .omp_region_var is not None
19931923 if DEBUG_OPENMP >= 2 :
19941924 print (
@@ -2060,6 +1990,7 @@ def openmp_region_end_defs(region, use_set=None, def_set=None):
20601990ir_extension_usedefs [openmp_region_end ] = openmp_region_end_defs
20611991
20621992
1993+ # Callbacks for type inference extensions for openmp region start and end.
20631994def openmp_region_start_infer (prs , typeinferer ):
20641995 pass
20651996
0 commit comments