Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kernel-open/common/inc/nv-linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -1610,6 +1610,7 @@ extern NvU32 NVreg_EnableUserNUMAManagement;
extern NvU32 NVreg_RegisterPCIDriver;
extern NvU32 NVreg_RegisterPlatformDeviceDriver;
extern NvU32 NVreg_EnableResizableBar;
extern NvU32 NVreg_Bar1P2PMode;
extern NvU32 NVreg_TegraGpuPgMask;
extern NvU32 NVreg_EnableNonblockingOpen;
extern NvU32 NVreg_UseKernelSuspendNotifiers;
Expand Down
3 changes: 1 addition & 2 deletions kernel-open/nvidia-uvm/uvm_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1824,8 +1824,7 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)

static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
{
return true;
// return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
}

static bool uvm_parent_gpu_supports_ats(const uvm_parent_gpu_t *parent_gpu)
Expand Down
5 changes: 3 additions & 2 deletions kernel-open/nvidia/nv-pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,12 @@ static int nv_resize_pcie_bars(struct pci_dev *pci_dev) {
struct pci_host_bridge *host;
#endif

/*if (NVreg_EnableResizableBar == 0)
if ((NVreg_EnableResizableBar == 0) &&
(NVreg_Bar1P2PMode == NV_BAR1_P2P_MODE_DISABLE))
{
nv_printf(NV_DBG_INFO, "NVRM: resizable BAR disabled by regkey, skipping\n");
return 0;
}*/
}

// Check if BAR1 has PCIe rebar capabilities
sizes = pci_rebar_get_possible_sizes(pci_dev, NV_GPU_BAR1);
Expand Down
20 changes: 20 additions & 0 deletions kernel-open/nvidia/nv-reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,24 @@
#define __NV_ENABLE_RESIZABLE_BAR EnableResizableBar
#define NV_REG_ENABLE_RESIZABLE_BAR NV_REG_STRING(__NV_ENABLE_RESIZABLE_BAR)

/*
* Option: Bar1P2PMode
*
* Description:
*
* Set BAR1 P2P policy.
*
* Possible values:
* 0 - disable: do not use BAR1 based P2P (default)
* 1 - enable: prefer NVLink/C2C and use BAR1 PCIe as fallback
* 2 - pcie: use only PCIe BAR1 P2P even if NVLink/C2C is available
*/
#define __NV_BAR1_P2P_MODE Bar1P2PMode
#define NV_REG_BAR1_P2P_MODE NV_REG_STRING(__NV_BAR1_P2P_MODE)
#define NV_BAR1_P2P_MODE_DISABLE 0
#define NV_BAR1_P2P_MODE_ENABLE 1
#define NV_BAR1_P2P_MODE_PCIE 2

/*
* Option: EnableGpuFirmware
*
Expand Down Expand Up @@ -1066,6 +1084,7 @@ NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_PCIE_RELAXED_ORDERING_MODE, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_REGISTER_PCI_DRIVER, 1);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_REGISTER_PLATFORM_DEVICE_DRIVER, 1);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_RESIZABLE_BAR, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_BAR1_P2P_MODE, NV_BAR1_P2P_MODE_ENABLE);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_DBG_BREAKPOINT, 0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_TEGRA_GPU_PG_MASK, ~0);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_ENABLE_NONBLOCKING_OPEN, 1);
Expand Down Expand Up @@ -1130,6 +1149,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_REGISTER_PCI_DRIVER),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_PCIE_RELAXED_ORDERING_MODE),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_RESIZABLE_BAR),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_BAR1_P2P_MODE),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_GPU_FIRMWARE),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_GPU_FIRMWARE_LOGS),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_RM_NVLINK_BW_LINK_COUNT),
Expand Down
33 changes: 33 additions & 0 deletions kernel-open/nvidia/os-registry.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,33 @@ NvBool nv_is_uuid_in_gpu_exclusion_list(const char *uuid)
return NV_FALSE;
}

static void nv_apply_bar1_p2p_mode(nvidia_stack_t *sp)
{
switch (NVreg_Bar1P2PMode)
{
case NV_BAR1_P2P_MODE_DISABLE:
break;

case NV_BAR1_P2P_MODE_ENABLE:
rm_write_registry_dword(sp, NULL, "ForceP2P", 0x11);
rm_write_registry_dword(sp, NULL, "RMForceP2PType", 0x0);
rm_write_registry_dword(sp, NULL, "RMPcieP2PType", 0x1);
break;

case NV_BAR1_P2P_MODE_PCIE:
rm_write_registry_dword(sp, NULL, "ForceP2P", 0x11);
rm_write_registry_dword(sp, NULL, "RMForceP2PType", 0x1);
rm_write_registry_dword(sp, NULL, "RMPcieP2PType", 0x1);
break;

default:
nv_printf(NV_DBG_WARNINGS,
"NVRM: Invalid NVreg_Bar1P2PMode=%u, using disable mode\n",
NVreg_Bar1P2PMode);
break;
}
}

NV_STATUS NV_API_CALL os_registry_init(void)
{
nv_parm_t *entry;
Expand Down Expand Up @@ -343,6 +370,12 @@ NV_STATUS NV_API_CALL os_registry_init(void)
}
}

/*
* Apply module-level BAR1 P2P policy before RegistryDwords parsing so
* explicit RegistryDwords can override this policy if needed.
*/
nv_apply_bar1_p2p_mode(sp);

rm_parse_option_string(sp, NVreg_RegistryDwords);

for (i = 0; (entry = &nv_parms[i])->name != NULL; i++)
Expand Down
6 changes: 3 additions & 3 deletions src/nvidia/src/kernel/gpu/bif/kernel_bif.c
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ _kbifInitRegistryOverrides
NvU32 data32;

// P2P Override
pKernelBif->p2pOverride = 0x11;
pKernelBif->p2pOverride = BIF_P2P_NOT_OVERRIDEN;
if (osReadRegistryDword(pGpu, NV_REG_STR_CL_FORCE_P2P, &data32) == NV_OK)
{
pKernelBif->p2pOverride = data32;
Expand All @@ -1168,14 +1168,14 @@ _kbifInitRegistryOverrides
}

// P2P force type override
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_PCIEP2P;
pKernelBif->forceP2PType = NV_REG_STR_RM_FORCE_P2P_TYPE_DEFAULT;
if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FORCE_P2P_TYPE, &data32) == NV_OK &&
(data32 <= NV_REG_STR_RM_FORCE_P2P_TYPE_MAX))
{
pKernelBif->forceP2PType = data32;
}

pKernelBif->pcieP2PType = NV_REG_STR_RM_PCIEP2P_TYPE_BAR1;
pKernelBif->pcieP2PType = NV_REG_STR_RM_PCIEP2P_TYPE_DEFAULT;
if (osReadRegistryDword(pGpu, NV_REG_STR_RM_PCIEP2P_TYPE, &data32) == NV_OK)
{
pKernelBif->pcieP2PType = data32;
Expand Down
27 changes: 16 additions & 11 deletions src/nvidia/src/kernel/rmapi/nv_gpu_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -4030,13 +4030,13 @@ nvGpuOpsBuildExternalAllocPtes
NvU32 ptePcfHw = 0;

nvFieldSetBool(&pPteFmt->fldValid, NV_TRUE, pte.v8);
// gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
if (aperture == GMMU_APERTURE_PEER) {
if ((aperture == GMMU_APERTURE_PEER) && isBar1P2PSupported)
{
gmmuFieldSetAperture(&pPteFmt->fldAperture, GMMU_APERTURE_SYS_COH, pte.v8);
} else {
gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
}
nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8);
nvFieldSet32(&pPteFmt->fldKind, kind, pte.v8);

ptePcfSw |= vol ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0;
ptePcfSw |= readOnly ? (1 << SW_MMU_PCF_RO_IDX) : 0;
Expand Down Expand Up @@ -4079,9 +4079,12 @@ nvGpuOpsBuildExternalAllocPtes
if (nvFieldIsValid32(&pPteFmt->fldAtomicDisable.desc))
nvFieldSetBool(&pPteFmt->fldAtomicDisable, !atomic, pte.v8);

if (aperture == GMMU_APERTURE_PEER) {
if ((aperture == GMMU_APERTURE_PEER) && isBar1P2PSupported)
{
gmmuFieldSetAperture(&pPteFmt->fldAperture, GMMU_APERTURE_SYS_NONCOH, pte.v8);
} else {
}
else
{
gmmuFieldSetAperture(&pPteFmt->fldAperture, aperture, pte.v8);
}

Expand All @@ -4094,11 +4097,12 @@ nvGpuOpsBuildExternalAllocPtes
}
}

if (aperture == GMMU_APERTURE_PEER) {
if ((aperture == GMMU_APERTURE_PEER) && isBar1P2PSupported)
{
fabricBaseAddress = bar1BusAddr;
}

/*if (aperture == GMMU_APERTURE_PEER)
if ((aperture == GMMU_APERTURE_PEER) && !isBar1P2PSupported)
{
nvFieldSet32(&pPteFmt->fldPeerIndex, peerId, pte.v8);

Expand Down Expand Up @@ -4158,7 +4162,7 @@ nvGpuOpsBuildExternalAllocPtes
}
}
}
}*/
}

//
// Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead.
Expand Down Expand Up @@ -4435,11 +4439,12 @@ nvGpuOpsBuildExternalAllocPhysAddrs
return NV_ERR_BUFFER_TOO_SMALL;


if (aperture == GMMU_APERTURE_PEER) {
if ((aperture == GMMU_APERTURE_PEER) && isBar1P2PSupported)
{
fabricBaseAddress = bar1BusAddr;
}

/*if (aperture == GMMU_APERTURE_PEER)
if ((aperture == GMMU_APERTURE_PEER) && !isBar1P2PSupported)
{
//
// Any fabric memory descriptors are pre-encoded with the fabric base address
Expand Down Expand Up @@ -4497,7 +4502,7 @@ nvGpuOpsBuildExternalAllocPhysAddrs
}
}
}
}*/
}

//
// Both memdescGetPhysAddr() and kgmmuEncodePhysAddr() have pretty high overhead.
Expand Down
3 changes: 2 additions & 1 deletion src/nvidia/src/libraries/mmu/gmmu_fmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ gmmuFmtPtePhysAddrFld
{
case GMMU_APERTURE_SYS_COH:
case GMMU_APERTURE_SYS_NONCOH:
case GMMU_APERTURE_PEER:
return &pPte->fldAddrSysmem;
case GMMU_APERTURE_PEER:
return &pPte->fldAddrPeer;
case GMMU_APERTURE_VIDEO:
return &pPte->fldAddrVidmem;
default:
Expand Down