Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# NVIDIA driver 595.45.04 with P2P for 4090 and 5090
# NVIDIA driver 595.58.03 with P2P for 4090 and 5090

This allows using P2P on 4090 and 5090 GPUs with the 595.45.04 driver version.
See https://github.com/tinygrad/open-gpu-kernel-modules (various branches) for more info.
Expand All @@ -17,7 +17,7 @@ Note that this is very dangerous if you run untrusted software or devices.
1) Edit `/etc/default/grub`
2) Add `amd_iommu=on iommu=pt` to `GRUB_CMDLINE_LINUX_DEFAULT`
3) Run `sudo update-grub`
2) Install https://www.nvidia.com/en-us/drivers/details/265309/
2) Install https://www.nvidia.com/fr-fr/drivers/details/265902/
3) Run `./install.sh` in this repo
4) Reboot the server

Expand Down
10 changes: 10 additions & 0 deletions install-cachy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

#!/bin/bash

export IGNORE_CC_MISMATCH=1
sudo rmmod nvidia_drm nvidia_modeset nvidia_uvm nvidia
set -e
make modules -j$(nproc) CC=clang LD=ld.lld AR=llvm-ar CXX=clang++ OBJCOPY=llvm-objcopy NV_VERBOSE=1
sudo make modules_install -j$(nproc) CC=clang LD=ld.lld AR=llvm-ar CXX=clang++ OBJCOPY=llvm-objcopy NV_VERBOSE=1
sudo depmod
nvidia-smi
2 changes: 1 addition & 1 deletion kernel-open/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
ccflags-y += -I$(src)
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
ccflags-y += -DNV_VERSION_STRING=\"595.45.04\"
ccflags-y += -DNV_VERSION_STRING=\"595.58.03\"

# Include and link Tegra out-of-tree modules.
ifneq ($(wildcard /usr/src/nvidia/nvidia-public),)
Expand Down
3 changes: 1 addition & 2 deletions kernel-open/common/inc/nv-linux.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2001-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2001-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -1353,7 +1353,6 @@ typedef struct nv_linux_state_s {
struct reset_control *nvdisplay_reset;
struct reset_control *dsi_core_reset;
struct reset_control *mipi_cal_reset;
struct reset_control *hdacodec_reset;

/*
* nv_imp_icc_path represents the interconnect path across which display
Expand Down
10 changes: 10 additions & 0 deletions kernel-open/common/inc/nv-mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ static inline void nv_vma_flags_clear_word(struct vm_area_struct *vma, unsigned
ACCESS_PRIVATE(vma, __vm_flags) &= ~flags;
#endif
}

static inline int nv_is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
{
#if defined(NV_IS_VMA_WRITE_LOCKED_HAS_MM_LOCK_SEQ_ARG)
return __is_vma_write_locked(vma, mm_lock_seq);
#else
*mm_lock_seq = __vma_raw_mm_seqnum(vma);
return __is_vma_write_locked(vma);
#endif
}
#endif // !NV_CAN_CALL_VMA_START_WRITE

static inline void nv_vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
Expand Down
10 changes: 10 additions & 0 deletions kernel-open/common/inc/nvkms-kapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,17 @@ struct NvKmsKapiGpuInfo {
MIGDeviceId migDevice;
};

/*
* Linux kernel options CONFIG_RANDSTRUCT_* randomize structs that are composed
* entirely of function pointers, but can only control struct layout for sources
* built by kbuild. NvKmsKapiCallbacks is shared between kbuild-built
* nvidia-drm.ko, and the "OS-agnostic" portions of nvidia-modeset.ko (not built
* by kbuild). Add a _padding member to disable struct randomization.
*
* Refer to https://github.com/NVIDIA/open-gpu-kernel-modules/issues/1033
*/
struct NvKmsKapiCallbacks {
int _padding;
void (*suspendResume)(NvBool suspend);
void (*remove)(NvU32 gpuId);
void (*probe)(const struct NvKmsKapiGpuInfo *gpu_info);
Expand Down
12 changes: 11 additions & 1 deletion kernel-open/common/inc/nvtypes.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -514,6 +514,16 @@ typedef struct
#define NV_DECLARE_ALIGNED(TYPE_VAR, ALIGN) __declspec(align(ALIGN)) TYPE_VAR
#elif defined(__arm)
#define NV_DECLARE_ALIGNED(TYPE_VAR, ALIGN) __align(ALIGN) TYPE_VAR
#endif

#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
#define NV_ALIGNOF(type) _Alignof(type)
#elif defined(__cplusplus) && (__cplusplus >= 201103L)
#define NV_ALIGNOF(type) alignof(type)
#elif defined(__GNUC__) || defined(__clang__)
#define NV_ALIGNOF(type) __alignof__(type)
#elif defined(_MSC_VER)
#define NV_ALIGNOF(type) __alignof(type)
#endif

/***************************************************************************\
Expand Down
20 changes: 20 additions & 0 deletions kernel-open/conftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5117,6 +5117,26 @@ compile_test() {
compile_check_conftest "$CODE" "NV_DRM_CRTC_FUNCS_HAS_GET_VBLANK_TIMESTAMP" "" "types"
;;

is_vma_write_locked_has_mm_lock_seq_arg)
#
# Determine if __is_vma_write_locked() takes only a single
# 'struct vm_area_struct *' argument.
#
# Commit 22f7639f2f03 ("mm/vma: improve and document
# __is_vma_write_locked()") removed the 'unsigned int *mm_lock_seq'
# parameter in v7.0-rc1.
#
CODE="
#include <linux/mm.h>
#include <linux/mmap_lock.h>
int conftest_is_vma_write_locked_has_mm_lock_seq_arg(struct vm_area_struct *vma) {
unsigned int mm_lock_seq;
return __is_vma_write_locked(vma, &mm_lock_seq);
}"

compile_check_conftest "$CODE" "NV_IS_VMA_WRITE_LOCKED_HAS_MM_LOCK_SEQ_ARG" "" "types"
;;

# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit. Please
# avoid specifying -rc kernels, and only use SHAs that actually exist
Expand Down
8 changes: 8 additions & 0 deletions kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,14 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
allocParams.compressible = &p->compressible;

pMemory = nvKms->allocateMemory(nv_dev->pDevice, &allocParams);
if (pMemory == NULL && (p->flags & NV_GEM_ALLOC_NO_SCANOUT)) {
NV_DRM_DEV_LOG_INFO(
nv_dev,
"Failed to allocate NVKMS video memory for GEM object, trying to fall back to sysmem");
allocParams.useVideoMemory = false;
pMemory = nvKms->allocateMemory(nv_dev->pDevice, &allocParams);
}

if (pMemory == NULL) {
ret = -EINVAL;
NV_DRM_DEV_LOG_ERR(nv_dev,
Expand Down
19 changes: 17 additions & 2 deletions kernel-open/nvidia-uvm/uvm_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -2921,7 +2921,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
#if UVM_IS_CONFIG_HMM()
// HMM was disabled when first initialising the parent so we can't support
// it now. Tell the caller to retry with it disabled.
else if (!parent_gpu->devmem && enable_hmm) {
else if (!parent_gpu->devmem && enable_hmm && uvm_hmm_is_enabled_system_wide()) {
status = NV_ERR_BUSY_RETRY;
goto error;
}
Expand Down Expand Up @@ -3222,10 +3222,25 @@ uvm_gpu_phys_address_t uvm_gpu_peer_phys_address(uvm_gpu_t *owning_gpu, NvU64 ad
uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address, uvm_gpu_t *accessing_gpu)
{
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
const bool mig_peers_use_phys = uvm_gpus_are_smc_peers(owning_gpu, accessing_gpu) &&
accessing_gpu->parent->ce_phys_vidmem_write_supported;

if (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)
// MIG peers do not create peer vidmem mappings like other peers. They do
// create their vidmem identity mappings to cover all possible physical
// addresses, even those of other MIG peers.
// Use vidmem this identity mapping if CEs need to use virtual addresses.
if (uvm_gpus_are_smc_peers(owning_gpu, accessing_gpu) && !mig_peers_use_phys) {
uvm_gpu_phys_address_t phys_address = uvm_gpu_peer_phys_address(owning_gpu, address, accessing_gpu);
return uvm_gpu_address_virtual_from_vidmem_phys(accessing_gpu, phys_address.address);
}

// Use physical addresses for MIGs peers if CE allows it. Irespective of
// the peer copy mode.
if (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL || mig_peers_use_phys)
return uvm_gpu_address_from_phys(uvm_gpu_peer_phys_address(owning_gpu, address, accessing_gpu));

// MIG peers do not create peer GPU mappings so it should never reach here.
UVM_ASSERT(!uvm_gpus_are_smc_peers(owning_gpu, accessing_gpu));
UVM_ASSERT(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL);
gpu_peer_mapping = uvm_gpu_get_peer_mapping(accessing_gpu, owning_gpu->id);

Expand Down
5 changes: 4 additions & 1 deletion kernel-open/nvidia-uvm/uvm_hmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,10 @@ static void fill_dst_pfn(uvm_va_block_t *va_block,

UVM_ASSERT(!page_count(dpage));
UVM_ASSERT(!dpage->zone_device_data);
nv_zone_device_page_init(dpage);


zone_device_page_init(dpage, 0, 0);

dpage->zone_device_data = gpu_chunk;
atomic64_inc(&va_block->hmm.va_space->hmm.allocated_page_count);
}
Expand Down
5 changes: 5 additions & 0 deletions kernel-open/nvidia-uvm/uvm_pmm_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@
#include "uvm_test.h"
#include "uvm_linux.h"

#include <linux/version.h>

#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
#include <linux/pci-p2pdma.h>
#endif
Expand Down Expand Up @@ -3019,6 +3021,7 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
return ret;
}


static void devmem_page_free(struct page *page)
{
uvm_gpu_chunk_t *chunk = uvm_pmm_devmem_page_to_chunk(page);
Expand Down Expand Up @@ -3271,6 +3274,7 @@ static void device_p2p_page_free_wake(struct nv_kref *ref)
wake_up(&p2p_mem->waitq);
}


static void device_p2p_page_free(struct page *page)
{
uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
Expand All @@ -3286,6 +3290,7 @@ static void device_p2p_folio_free(struct folio *folio)
#endif

#if UVM_CDMM_PAGES_SUPPORTED()

static void device_coherent_page_free(struct page *page)
{
device_p2p_page_free(page);
Expand Down
17 changes: 12 additions & 5 deletions kernel-open/nvidia/nv-mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,15 +957,22 @@ void NV_API_CALL nv_set_safe_to_mmap_locked(
}

#if !NV_CAN_CALL_VMA_START_WRITE

#if defined(VM_REFCNT_EXCLUDE_READERS_FLAG)
#define NV_VMA_LOCK_OFFSET VM_REFCNT_EXCLUDE_READERS_FLAG
#else
#define NV_VMA_LOCK_OFFSET VMA_LOCK_OFFSET
#endif

static NvBool nv_vma_enter_locked(struct vm_area_struct *vma, NvBool detaching)
{
NvU32 tgt_refcnt = VMA_LOCK_OFFSET;
NvU32 tgt_refcnt = NV_VMA_LOCK_OFFSET;
NvBool interrupted = NV_FALSE;
if (!detaching)
{
tgt_refcnt++;
}
if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt))
if (!refcount_add_not_zero(NV_VMA_LOCK_OFFSET, &vma->vm_refcnt))
{
return NV_FALSE;
}
Expand Down Expand Up @@ -995,7 +1002,7 @@ static NvBool nv_vma_enter_locked(struct vm_area_struct *vma, NvBool detaching)
if (interrupted)
{
// Clean up on error: release refcount and dep_map
refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt);
refcount_sub_and_test(NV_VMA_LOCK_OFFSET, &vma->vm_refcnt);
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
return NV_FALSE;
}
Expand All @@ -1011,7 +1018,7 @@ void nv_vma_start_write(struct vm_area_struct *vma)
{
NvU32 mm_lock_seq;
NvBool locked;
if (__is_vma_write_locked(vma, &mm_lock_seq))
if (nv_is_vma_write_locked(vma, &mm_lock_seq))
return;

locked = nv_vma_enter_locked(vma, NV_FALSE);
Expand All @@ -1020,7 +1027,7 @@ void nv_vma_start_write(struct vm_area_struct *vma)
if (locked)
{
NvBool detached;
detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt);
detached = refcount_sub_and_test(NV_VMA_LOCK_OFFSET, &vma->vm_refcnt);
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
WARN_ON_ONCE(detached);
}
Expand Down
42 changes: 1 addition & 41 deletions kernel-open/nvidia/nv-platform.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -497,26 +497,6 @@ NV_STATUS NV_API_CALL nv_soc_device_reset(nv_state_t *nv)
goto out;
}
}

if (nvl->hdacodec_reset != NULL)
{
/*
* HDACODEC reset control is shared between display driver and audio driver.
* Since reset_control_reset toggles the reset signal, we prefer to use
* reset_control_deassert. Additionally, since Audio driver uses
* reset_control_bulk_deassert() which internally calls reset_control_deassert,
* we must use reset_control_deassert, because consumers must not use
* reset_control_reset on shared reset lines when reset_control_deassert has
* been used.
*/
rc = reset_control_deassert(nvl->hdacodec_reset);
if (rc != 0)
{
status = NV_ERR_GENERIC;
nv_printf(NV_DBG_ERRORS, "NVRM: hdacodec reset_control_deassert failed, rc: %d\n", rc);
goto out;
}
}
}
out:
return status;
Expand Down Expand Up @@ -1078,26 +1058,6 @@ static int nv_platform_device_display_probe(struct platform_device *plat_dev)
nv_printf(NV_DBG_ERRORS, "NVRM: mipi_cal devm_reset_control_get failed, err: %ld\n", PTR_ERR(nvl->mipi_cal_reset));
nvl->mipi_cal_reset = NULL;
}

/*
* In T23x, HDACODEC is part of the same power domain as NVDisplay, so
* unpowergating the DISP domain also results in the HDACODEC reset
* being de-asserted. However, in T26x, HDACODEC is being moved
* out to a separate always-on domain, so we need to explicitly de-assert
* the HDACODEC reset in RM. We don't have good way to differentiate
* between T23x vs T264x at this place. So if there is failure to read
* "hdacodec_reset" from DT silently ignore it for now. In long term we
* should really look into using the devm_reset_control_bulk* APIs and
* see if this is feasible if we're ultimately just getting and
* asserting/deasserting all of the resets specified in DT together all of
* the time, and if there's no scenarios in which we need to only use a
* specific set of reset(s) at a given point.
*/
nvl->hdacodec_reset = devm_reset_control_get(nvl->dev, "hdacodec_reset");
if (IS_ERR(nvl->hdacodec_reset))
{
nvl->hdacodec_reset = NULL;
}
}

status = nv_imp_icc_get(nv);
Expand Down
1 change: 1 addition & 0 deletions kernel-open/nvidia/nvidia.Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += has_enum_pidtype_tgid
NV_CONFTEST_TYPE_COMPILE_TESTS += bpmp_mrq_has_strap_set
NV_CONFTEST_TYPE_COMPILE_TESTS += register_shrinker_has_format_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_resize_resource_has_exclude_bars_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += is_vma_write_locked_has_mm_lock_seq_arg

NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
Expand Down
3 changes: 2 additions & 1 deletion src/common/displayport/inc/dp_connectorimpl2x.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -110,6 +110,7 @@ namespace DisplayPort
bool bStuffDummySymbolsFor128b132b;
bool bStuffDummySymbolsFor8b10b;
bool bDisableWatermarkCaching;
bool bEnableClearMSAWhenNotUsed;

// Do not enable downspread while link training.
bool bDisableDownspread;
Expand Down
4 changes: 4 additions & 0 deletions src/common/displayport/inc/dp_regkeydatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@

#define NV_DP_REGKEY_DISABLE_NATIVE_DISPLAYID2X_SUPPORT "DISABLE_NATIVE_DISPLAYID2X_SUPPORT"
#define NV_DP_REGKEY_FORCE_NLPIGNORE_DDS "DP_FORCE_NLPIGNORE_DDS"

#define NV_DP_REGKEY_ENABLE_CLEAR_MSA_WHEN_NOT_USED "DP_ENABLE_CLEAR_MSA_WHEN_NOT_USED"

//
// Data Base used to store all the regkey values.
// The actual data base is declared statically in dp_evoadapter.cpp.
Expand Down Expand Up @@ -169,6 +172,7 @@ struct DP_REGKEY_DATABASE
bool bDisableNativeDisplayId2xSupport;
bool bUseMaxDSCCompressionMST;
bool bIgnoreUnplugUnlessRequested;
bool bEnableClearMSAWhenNotUsed;
};

extern struct DP_REGKEY_DATABASE dpRegkeyDatabase;
Expand Down
Loading