-
Notifications
You must be signed in to change notification settings - Fork 268
Open
Description
Hello
hwloc crashes in recent releases of level-zero and compute-runtime. I don't know exactly when it started because it's complicated to test different releases when Debian/Ubuntu official ZE packages (mostly level-zero package names) are incompatible with the deb packages you distribute.
Anyway, here's a small reproducer extracted from hwloc. It basically just list ZES drivers and devices and calls zesDevicePciGetProperties() on each:
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
#include <level_zero/ze_api.h>
#include <level_zero/zes_api.h>
int main(void)
{
zes_driver_handle_t *sdrh;
uint32_t nbdrivers, i, k;
ze_result_t res;
printf("testing ZES devices\n");
res = zesInit(0);
if (res != ZE_RESULT_SUCCESS) {
fprintf(stderr, "Failed to initialize LevelZero Sysman in zesInit(): %d\n", (int)res);
return 0;
}
nbdrivers = 0;
res = zesDriverGet(&nbdrivers, NULL);
if (res != ZE_RESULT_SUCCESS || !nbdrivers)
return 0;
sdrh = malloc(nbdrivers * sizeof(*sdrh));
if (!sdrh)
return 0;
res = zesDriverGet(&nbdrivers, sdrh);
if (res != ZE_RESULT_SUCCESS) {
free(sdrh);
return 0;
}
printf("found %u L0 ZES drivers\n", nbdrivers);
k = 0;
for(i=0; i<nbdrivers; i++) {
uint32_t nbdevices, j;
zes_device_handle_t *sdvh;
nbdevices = 0;
res = zesDeviceGet(sdrh[i], &nbdevices, NULL);
if (res != ZE_RESULT_SUCCESS || !nbdevices)
continue;
sdvh = malloc(nbdevices * sizeof(*sdvh));
if (!sdvh)
continue;
res = zesDeviceGet(sdrh[i], &nbdevices, sdvh);
if (res != ZE_RESULT_SUCCESS) {
free(sdvh);
continue;
}
printf("found %u L0 ZES devices in driver #%u\n", nbdevices, i);
for (j=0; j<nbdevices; j++, k++) {
zes_pci_properties_t pci;
res = zesDevicePciGetProperties(sdvh[j], &pci);
if (res != ZE_RESULT_SUCCESS) {
errno = EINVAL;
return -1;
}
printf("got PCI\n");
}
free(sdvh);
}
free(sdrh);
return 0;
}
gdb shows:
testing ZES devices
⚠️ warning: Corrupted shared library list: 0x555557ab6ca0 != 0x555555576310
found 1 L0 ZES drivers
found 1 L0 ZES devices in driver #0
Program received signal SIGSEGV, Segmentation fault.
L0::Sysman::PciImp::pciStaticProperties (this=0x555557ab75b0,
pProperties=0x7fffffffe2b0)
at ../../neo/level_zero/sysman/source/api/pci/sysman_pci_imp.cpp:126
⚠️ warning: 126 ../../neo/level_zero/sysman/source/api/pci/sysman_pci_imp.cpp: Aucun fichier ou dossier de ce nom
The machine is a laptop with 13th Gen Intel(R) Core(TM) i7-1370P running 6.18.12+deb14-amd64
I removed all relevant packages and installed your last ones (+ dbgsym ones):
intel-igc-core-2_2.28.4+20760_amd64.deb
intel-ocloc_26.05.37020.3-0_amd64.deb
level-zero-devel_1.27.0+u24.04_amd64.deb
libigdgmm12_22.9.0_amd64.deb
intel-igc-opencl-2_2.28.4+20760_amd64.deb
intel-opencl-icd_26.05.37020.3-0_amd64.deb
level-zero_1.27.0+u24.04_amd64.deb
libze-intel-gpu1_26.05.37020.3-0_amd64.deb
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels