This commit is contained in:
Bernhard Stoeckner 2023-08-08 18:26:28 +02:00
parent 29f830f1bb
commit 12c0739352
No known key found for this signature in database
GPG Key ID: 7D23DC2750FAC2E1
85 changed files with 33144 additions and 32566 deletions

View File

@ -2,6 +2,8 @@
## Release 535 Entries
### [535.98] 2023-08-08
### [535.86.10] 2023-07-31
### [535.86.05] 2023-07-18

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.86.10.
version 535.98.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.86.10 driver release. This can be achieved by installing
535.98 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -180,7 +180,7 @@ software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.86.10 release,
(see the table below). However, in the 535.98 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.86.10/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.98/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -829,6 +829,10 @@ Subsystem Device ID.
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
| NVIDIA L40 | 26B5 10DE 169D |
| NVIDIA L40 | 26B5 10DE 17DA |
| NVIDIA GeForce RTX 4080 | 2704 |
@ -842,6 +846,10 @@ Subsystem Device ID.
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
| NVIDIA L4 | 27B8 10DE 16CA |
| NVIDIA L4 | 27B8 10DE 16EE |
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.86.10\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.98\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -275,6 +275,7 @@ NV_HEADER_PRESENCE_TESTS = \
asm/opal-api.h \
sound/hdaudio.h \
asm/pgtable_types.h \
asm/page.h \
linux/stringhash.h \
linux/dma-map-ops.h \
rdma/peer_mem.h \
@ -300,7 +301,9 @@ NV_HEADER_PRESENCE_TESTS = \
linux/vfio_pci_core.h \
linux/mdev.h \
soc/tegra/bpmp-abi.h \
soc/tegra/bpmp.h
soc/tegra/bpmp.h \
linux/cc_platform.h \
asm/cpufeature.h
# Filename to store the define for the header in $(1); this is only consumed by
# the rule below that concatenates all of these together.

View File

@ -511,7 +511,11 @@ static inline void nv_vfree(void *ptr, NvU64 size)
static inline void *nv_ioremap(NvU64 phys, NvU64 size)
{
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_PRESENT)
void *ptr = ioremap_driver_hardened(phys, size);
#else
void *ptr = ioremap(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
@ -524,11 +528,11 @@ static inline void *nv_ioremap_nocache(NvU64 phys, NvU64 size)
static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_CACHE_PRESENT)
void *ptr = ioremap_cache(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_CACHE_SHARED_PRESENT)
ptr = ioremap_cache_shared(phys, size);
#elif defined(NV_IOREMAP_CACHE_PRESENT)
ptr = ioremap_cache(phys, size);
#elif defined(NVCPU_PPC64LE)
//
// ioremap_cache() has been only implemented correctly for ppc64le with
@ -543,25 +547,32 @@ static inline void *nv_ioremap_cache(NvU64 phys, NvU64 size)
// (commit 40f1ce7fb7e8, kernel 3.0+) and that covers all kernels we
// support on power.
//
void *ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
ptr = ioremap_prot(phys, size, pgprot_val(PAGE_KERNEL));
#else
return nv_ioremap(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
static inline void *nv_ioremap_wc(NvU64 phys, NvU64 size)
{
#if defined(NV_IOREMAP_WC_PRESENT)
void *ptr = ioremap_wc(phys, size);
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
void *ptr = NULL;
#if IS_ENABLED(CONFIG_INTEL_TDX_GUEST) && defined(NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT)
ptr = ioremap_driver_hardened_wc(phys, size);
#elif defined(NV_IOREMAP_WC_PRESENT)
ptr = ioremap_wc(phys, size);
#else
return nv_ioremap_nocache(phys, size);
#endif
if (ptr)
NV_MEMDBG_ADD(ptr, size);
return ptr;
}
static inline void nv_iounmap(void *ptr, NvU64 size)
@ -634,37 +645,24 @@ static NvBool nv_numa_node_has_memory(int node_id)
free_pages(ptr, order); \
}
extern NvU64 nv_shared_gpa_boundary;
static inline pgprot_t nv_sme_clr(pgprot_t prot)
{
#if defined(__sme_clr)
return __pgprot(__sme_clr(pgprot_val(prot)));
#else
return prot;
#endif // __sme_clr
}
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
{
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
/*
* When AMD memory encryption is enabled, device memory mappings with the
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
*
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
*/
#if defined(NV_CC_MKDEC_PRESENT)
if (nv_shared_gpa_boundary != 0)
{
/*
* By design, a VM using vTOM doesn't see the SEV setting and
* for AMD with vTOM, *set* means decrypted.
*/
prot = __pgprot(nv_shared_gpa_boundary | (pgprot_val(vm_prot)));
}
else
{
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
}
#else
prot = pgprot_decrypted(prot);
#endif
#endif
return prot;
#if defined(pgprot_decrypted)
return pgprot_decrypted(prot);
#else
return nv_sme_clr(prot);
#endif // pgprot_decrypted
}
#if defined(PAGE_KERNEL_NOENC)
@ -1324,7 +1322,7 @@ nv_dma_maps_swiotlb(struct device *dev)
* SEV memory encryption") forces SWIOTLB to be enabled when AMD SEV
* is active in all cases.
*/
if (os_sev_enabled)
if (os_cc_enabled)
swiotlb_in_use = NV_TRUE;
#endif

View File

@ -321,10 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
// The next two fields store UVM_BUFFER_LOCATION values
NvU32 gpFifoLoc;
NvU32 gpPutLoc;
// Allocate the channel as secure. This flag should only be set when
// Confidential Compute is enabled.
NvBool secure;
} UvmGpuChannelAllocParams;
typedef struct UvmGpuPagingChannelAllocParams_tag
@ -368,9 +364,6 @@ typedef struct
// True if the CE can be used for P2P transactions
NvBool p2p:1;
// True if the CE supports encryption
NvBool secure:1;
// Mask of physical CEs assigned to this LCE
//
// The value returned by RM for this field may change when a GPU is

View File

@ -214,8 +214,8 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvU32 os_sev_status;
extern NvBool os_sev_enabled;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
/*

View File

@ -445,6 +445,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@ -467,6 +470,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@ -524,6 +530,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@ -551,6 +560,9 @@ compile_test() {
#if defined(NV_ASM_PGTABLE_TYPES_H_PRESENT)
#include <asm/pgtable_types.h>
#endif
#if defined(NV_ASM_PAGE_H_PRESENT)
#include <asm/page.h>
#endif
#include <asm/set_memory.h>
#else
#include <asm/cacheflush.h>
@ -695,6 +707,50 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
;;
ioremap_driver_hardened)
#
# Determine if the ioremap_driver_hardened() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened(void) {
ioremap_driver_hardened();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_PRESENT" "" "functions"
;;
ioremap_driver_hardened_wc)
#
# Determine if the ioremap_driver_hardened_wc() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_driver_hardened_wc(void) {
ioremap_driver_hardened_wc();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_DRIVER_HARDENED_WC_PRESENT" "" "functions"
;;
ioremap_cache_shared)
#
# Determine if the ioremap_cache_shared() function is present.
# It does not exist on all architectures.
# TODO: Update the commit ID once the API is upstreamed.
#
CODE="
#include <asm/io.h>
void conftest_ioremap_cache_shared(void) {
ioremap_cache_shared();
}"
compile_check_conftest "$CODE" "NV_IOREMAP_CACHE_SHARED_PRESENT" "" "functions"
;;
dom0_kernel_present)
# Add config parameter if running on DOM0.
if [ -n "$VGX_BUILD" ]; then
@ -4888,40 +4944,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_CHANNEL_STATE_PRESENT" "" "types"
;;
pgprot_decrypted)
cc_platform_has)
#
# Determine if the macro 'pgprot_decrypted()' is present.
# Determine if 'cc_platform_has()' is present.
#
# Added by commit 21729f81ce8a ("x86/mm: Provide general kernel
# support for memory encryption") in v4.14 (2017-07-18)
# Added by commit aa5a461171f9 ("x86/sev: Add an x86 version of
# cc_platform_has()") in v5.15.3 (2021-10-04)
CODE="
#include <asm/pgtable.h>
void conftest_pgprot_decrypted(void)
if(pgprot_decrypted()) {}
}"
compile_check_conftest "$CODE" "NV_PGPROT_DECRYPTED_PRESENT" "" "functions"
;;
cc_mkdec)
#
# Determine if cc_mkdec() is present.
#
# cc_mkdec() by commit b577f542f93c ("x86/coco: Add API to handle
# encryption mask) in v5.18-rc1 (2022-02-22).
#
CODE="
#if defined(NV_ASM_COCO_H_PRESENT)
#include <asm/coco.h>
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
#include <linux/cc_platform.h>
#endif
void conftest_cc_mkdec(void) {
cc_mkdec();
void conftest_cc_platfrom_has(void) {
cc_platform_has();
}"
compile_check_conftest "$CODE" "NV_CC_MKDEC_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
;;
drm_prime_pages_to_sg_has_drm_device_arg)
@ -6636,8 +6674,8 @@ case "$5" in
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
# On x86_64, vGPU requires MDEV framework to be present.
# On aarch64, vGPU requires vfio-pci-core framework to be present.
if ([ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" != "0" ]) ||
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
exit 0
fi
@ -6649,8 +6687,8 @@ case "$5" in
echo "CONFIG_VFIO_IOMMU_TYPE1";
fi
if [ "$ARCH" = "arm64" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
echo "CONFIG_VFIO_PCI_CORE";
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
fi
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then

View File

@ -1367,8 +1367,23 @@ static struct drm_driver nv_drm_driver = {
.ioctls = nv_drm_ioctls,
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
/*
* linux-next commit 71a7974ac701 ("drm/prime: Unexport helpers for fd/handle
* conversion") unexports drm_gem_prime_handle_to_fd() and
* drm_gem_prime_fd_to_handle().
*
* Prior linux-next commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
* all drivers") made these helpers the default when .prime_handle_to_fd /
* .prime_fd_to_handle are unspecified, so it's fine to just skip specifying
* them if the helpers aren't present.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
#endif
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_fd_to_handle
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
#endif
.gem_prime_import = nv_drm_gem_prime_import,
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,

View File

@ -54,6 +54,8 @@ NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group

View File

@ -121,6 +121,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
return true;
if (uvm_channel_is_proxy(push->channel)) {
uvm_pushbuffer_t *pushbuffer;
if (dst.is_virtual) {
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
return false;
@ -142,7 +144,8 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
return false;
}
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
@ -177,10 +180,13 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
// irrespective of the virtualization mode.
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
{
uvm_pushbuffer_t *pushbuffer;
if (!uvm_channel_is_proxy(push->channel))
return;
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
pushbuffer = uvm_channel_get_pushbuffer(push->channel);
src->address -= uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
}
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,

View File

@ -760,7 +760,7 @@ static NV_STATUS alloc_vidmem_protected(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t
*mem = NULL;
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
TEST_NV_CHECK_GOTO(zero_vidmem(*mem), err);

View File

@ -272,19 +272,26 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static void unlock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
NvU32 index = uvm_channel_index_in_pool(channel);
NvU32 index;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (!uvm_conf_computing_mode_enabled(gpu))
return;
index = uvm_channel_index_in_pool(channel);
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(test_bit(index, channel->pool->push_locks));
__clear_bit(index, channel->pool->push_locks);
uvm_up_out_of_order(&channel->pool->push_sem);
}
}
static bool is_channel_locked_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel))
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_conf_computing_mode_enabled(gpu))
return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);
// For CE and proxy channels, we always return that the channel is locked,
@ -295,25 +302,25 @@ static bool is_channel_locked_for_push(uvm_channel_t *channel)
static void lock_channel_for_push(uvm_channel_t *channel)
{
if (uvm_channel_is_secure(channel)) {
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
UVM_ASSERT(!test_bit(index, channel->pool->push_locks));
__set_bit(index, channel->pool->push_locks);
}
}
static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 index = uvm_channel_index_in_pool(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
uvm_channel_pool_assert_locked(channel->pool);
if (uvm_channel_is_secure(channel) &&
!test_bit(index, channel->pool->push_locks) &&
try_claim_channel_locked(channel, num_gpfifo_entries)) {
if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
lock_channel_for_push(channel);
return true;
}
@ -321,57 +328,15 @@ static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo
return false;
}
// Reserve a channel in the specified CE pool
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
// Reserve a channel in the specified secure pool
static NV_STATUS channel_reserve_in_secure_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
// Reserve a channel in the specified pool. The channel is locked until the push
// ends
static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
NvU32 index;
UVM_ASSERT(pool);
UVM_ASSERT(pool->secure);
UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
@ -426,6 +391,51 @@ done:
return NV_OK;
}
// Reserve a channel in the specified pool
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
uvm_channel_t *channel;
uvm_spin_loop_t spin;
UVM_ASSERT(pool);
if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
return channel_reserve_and_lock_in_pool(pool, channel_out);
uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
}
uvm_spin_loop_init(&spin);
while (1) {
uvm_for_each_channel_in_pool(channel, pool) {
NV_STATUS status;
uvm_channel_update_progress(channel);
if (try_claim_channel(channel, 1)) {
*channel_out = channel;
return NV_OK;
}
status = uvm_channel_check_errors(channel);
if (status != NV_OK)
return status;
UVM_SPIN_LOOP(&spin);
}
}
UVM_ASSERT_MSG(0, "Cannot get here?!\n");
return NV_ERR_GENERIC;
}
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
{
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
@ -433,10 +443,7 @@ NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_t
UVM_ASSERT(pool != NULL);
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
@ -452,10 +459,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
if (pool->secure)
return channel_reserve_in_secure_pool(pool, channel_out);
return channel_reserve_in_ce_pool(pool, channel_out);
return channel_reserve_in_pool(pool, channel_out);
}
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
@ -491,7 +495,7 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
return push_info - channel->push_infos;
}
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_t *channel, NvU64 semaphore_va)
static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaphore_va)
{
NvU32 iv_index;
uvm_gpu_address_t notifier_gpu_va;
@ -499,12 +503,14 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, uvm_channel_
uvm_gpu_address_t semaphore_gpu_va;
uvm_gpu_address_t encrypted_payload_gpu_va;
uvm_gpu_t *gpu = push->gpu;
uvm_channel_t *channel = push->channel;
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
UvmCslIv *iv_cpu_addr = semaphore->conf_computing.ivs;
NvU32 payload_size = sizeof(*semaphore->payload);
NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
notifier_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.notifier, gpu, false);
@ -538,19 +544,21 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
{
NV_STATUS status;
uvm_channel_manager_t *manager;
uvm_gpu_t *gpu;
UVM_ASSERT(channel);
UVM_ASSERT(push);
manager = channel->pool->manager;
gpu = uvm_channel_get_gpu(channel);
// Only SEC2 and WLC with set up fixed schedule can use direct push
// submission. All other cases (including WLC pre-schedule) need to
// reserve a launch channel that will be used to submit this push
// indirectly.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager)) &&
!uvm_channel_is_sec2(channel)) {
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
!(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
@ -559,9 +567,9 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
return status;
}
// For secure channels, channel's lock should have been acquired in
// uvm_channel_reserve() or channel_reserve_in_secure_pool() before
// reaching here.
// When the Confidential Computing feature is enabled, the channel's lock
// should have already been acquired in uvm_channel_reserve() or
// channel_reserve_and_lock_in_pool().
UVM_ASSERT(is_channel_locked_for_push(channel));
push->channel = channel;
@ -586,9 +594,8 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
NvU64 *gpfifo_entry;
NvU64 pushbuffer_va;
uvm_channel_t *channel = push->channel;
uvm_channel_manager_t *channel_manager = channel->pool->manager;
uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
BUILD_BUG_ON(sizeof(*gpfifo_entry) != NVB06F_GP_ENTRY__SIZE);
UVM_ASSERT(!uvm_channel_is_proxy(channel));
@ -644,12 +651,11 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
static void do_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
else if (uvm_channel_is_sec2(push->channel))
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
else
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
gpu->parent->sec2_hal->semaphore_release(push, semaphore_va, new_payload);
}
static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
@ -668,8 +674,8 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
// needs to be scheduled to get an encrypted shadow copy in unprotected
// sysmem. This allows UVM to later decrypt it and observe the new
// semaphore value.
if (uvm_channel_is_secure_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, push->channel, semaphore_va);
if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
}
static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
@ -746,15 +752,12 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
wmb();
// Ring the WLC doorbell to start processing the above push
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset,
wlc_channel->channel_info.workSubmissionToken);
UVM_GPU_WRITE_ONCE(*wlc_channel->channel_info.workSubmissionOffset, wlc_channel->channel_info.workSubmissionToken);
}
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@ -767,7 +770,7 @@ static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
uvm_gpu_address_t push_enc_auth_tag_gpu;
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
// WLC submissions are done under channel lock, so there should be no
@ -848,8 +851,6 @@ static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&gpput_auth_tag_gpu);
// Update GPPUT. The update needs 4B write to specific offset,
// however we can only do 16B aligned decrypt writes.
// A poison value is written to all other locations, this is ignored in
@ -922,7 +923,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
}
else {
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
@ -951,11 +952,9 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
gpfifo_auth_tag_gpu.address);
}
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push, NvU32 old_cpu_put, NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(push->channel);
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
@ -968,7 +967,7 @@ static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
uvm_gpu_address_t push_auth_tag_gpu;
uvm_spin_loop_t spin;
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_ce(push->channel));
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
// If the old_cpu_put is not equal to the last gpu put, other pushes are
@ -1051,7 +1050,7 @@ static void encrypt_push(uvm_push_t *push)
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 push_size = uvm_push_get_size(push);
uvm_push_info_t *push_info = uvm_push_info_from_push(push);
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;
if (!uvm_conf_computing_mode_enabled(gpu))
@ -1098,6 +1097,7 @@ void uvm_channel_end_push(uvm_push_t *push)
NvU32 push_size;
NvU32 cpu_put;
NvU32 new_cpu_put;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
bool needs_sec2_work_submit = false;
channel_pool_lock(channel->pool);
@ -1112,7 +1112,7 @@ void uvm_channel_end_push(uvm_push_t *push)
if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
gpu->parent->ce_hal->semaphore_reduction_inc(push,
paired_lcic->channel_info.gpPutGpuVa,
paired_lcic->num_gpfifo_entries - 1);
@ -1126,7 +1126,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// pushes. However, direct pushes to WLC can be smaller than this
// size. This is used e.g. by indirect submission of control
// gpfifo entries.
channel_manager->gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
}
}
@ -1144,7 +1144,7 @@ void uvm_channel_end_push(uvm_push_t *push)
// Indirect submission via SEC2/WLC needs pushes to be aligned for
// encryption/decryption. The pushbuffer_size of this push
// influences starting address of the next push.
if (uvm_conf_computing_mode_enabled(uvm_channel_get_gpu(channel)))
if (uvm_conf_computing_mode_enabled(gpu))
entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
entry->push_info = &channel->push_infos[push->push_info_index];
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
@ -1158,12 +1158,13 @@ void uvm_channel_end_push(uvm_push_t *push)
else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_wlc(push);
}
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
}
else {
// submitting via SEC2 starts a push, postpone until this push is ended
// submitting via SEC2 starts a push, postpone until this push is
// ended
needs_sec2_work_submit = true;
}
}
@ -1202,12 +1203,13 @@ void uvm_channel_end_push(uvm_push_t *push)
static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry, NvU32 new_cpu_put)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NvU32 cpu_put = channel->cpu_put;
NvU64 *gpfifo_entry;
UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
return;
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
@ -1234,7 +1236,7 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
UVM_ASSERT(!uvm_channel_is_sec2(channel));
UVM_ASSERT(uvm_channel_is_ce(channel));
// If the old_cpu_put is not equal to the last gpu put,
// Another push(es) is pending that needs to be submitted.
@ -1290,6 +1292,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
NvU32 cpu_put;
NvU32 new_cpu_put;
bool needs_indirect_submit = false;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
channel_pool_lock(channel->pool);
@ -1312,7 +1315,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
--channel->current_gpfifo_count;
submit_ctrl_gpfifo(channel, entry, new_cpu_put);
if (uvm_conf_computing_mode_enabled(channel->pool->manager->gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
needs_indirect_submit = true;
channel->cpu_put = new_cpu_put;
@ -1385,16 +1388,15 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
return NV_OK;
}
static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
static NV_STATUS channel_reserve_and_lock(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool = channel->pool;
// This semaphore is uvm_up() in unlock_channel_for_push() as part of the
// uvm_channel_end_push() routine. Note that different than in
// channel_reserve_in_secure_pool, we cannot pick an unlocked channel from
// the secure pool, even when there is one available and *channel is locked.
// channel_reserve_and_lock_in_pool, we cannot pick an unlocked channel from
// the pool, even when there is one available and *channel is locked.
// Not a concern given that uvm_channel_reserve() is not the common-case for
// channel reservation, and only used for channel initialization, GPFIFO
// control work submission, and testing.
@ -1409,6 +1411,8 @@ static NV_STATUS uvm_channel_reserve_secure(uvm_channel_t *channel, NvU32 num_gp
uvm_spin_loop_init(&spin);
while (1) {
NV_STATUS status;
uvm_channel_update_progress(channel);
channel_pool_lock(pool);
@ -1436,9 +1440,10 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
NV_STATUS status = NV_OK;
uvm_spin_loop_t spin;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_channel_is_secure(channel))
return uvm_channel_reserve_secure(channel, num_gpfifo_entries);
if (uvm_conf_computing_mode_enabled(gpu))
return channel_reserve_and_lock(channel, num_gpfifo_entries);
if (try_claim_channel(channel, num_gpfifo_entries))
return NV_OK;
@ -1578,8 +1583,10 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
static NV_STATUS csl_init(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_secure(channel));
uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
@ -1589,7 +1596,7 @@ static NV_STATUS csl_init(uvm_channel_t *channel)
else {
UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(channel->pool->manager->gpu));
uvm_gpu_name(gpu));
}
return status;
@ -1609,7 +1616,10 @@ static void csl_destroy(uvm_channel_t *channel)
static void free_conf_computing_buffers(uvm_channel_t *channel)
{
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
uvm_rm_mem_free(channel->conf_computing.static_pb_unprotected_sysmem);
@ -1637,10 +1647,12 @@ static void free_conf_computing_buffers(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
{
uvm_gpu_semaphore_t *semaphore = &channel->tracking_sem.semaphore;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NV_STATUS status;
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(semaphore->conf_computing.last_pushed_notifier),
@ -1679,7 +1691,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@ -1723,7 +1735,7 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
const size_t notifier_size = sizeof(*channel->conf_computing.static_notifier_entry_unprotected_sysmem_cpu);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
@ -1758,8 +1770,10 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
{
NV_STATUS status;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
status = alloc_conf_computing_buffers_semaphore(channel);
if (status != NV_OK)
@ -1772,7 +1786,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
status = alloc_conf_computing_buffers_lcic(channel);
}
else {
uvm_gpu_t *gpu = channel->pool->manager->gpu;
void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
channel->num_gpfifo_entries);
@ -1793,6 +1806,8 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(pool->num_channels > 0);
if (channel->tracking_sem.queued_value > 0) {
@ -1816,9 +1831,10 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
uvm_kvfree(channel->gpfifo_entries);
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
csl_destroy(channel);
if (uvm_channel_is_secure_ce(channel))
if (uvm_channel_is_ce(channel))
free_conf_computing_buffers(channel);
}
@ -1905,8 +1921,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_alloc_params.gpPutLoc = UVM_BUFFER_LOCATION_SYS;
}
channel_alloc_params.secure = channel->pool->secure;
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(channel_get_tsg(channel),
&channel_alloc_params,
&channel->handle,
@ -1928,8 +1942,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
channel_info->hwChannelId,
uvm_channel_is_sec2(channel) ? "SEC2" :
uvm_channel_is_wlc(channel) ? "WLC" :
uvm_channel_is_lcic(channel) ? "LCIC" :
uvm_channel_is_secure(channel) ? "CE (secure)" : "CE",
uvm_channel_is_lcic(channel) ? "LCIC" : "CE",
channel->pool->engine_index);
return NV_OK;
@ -1981,7 +1994,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
channel->tools.pending_event_count = 0;
INIT_LIST_HEAD(&channel->tools.channel_list_node);
if (uvm_conf_computing_mode_enabled(gpu) && !uvm_channel_is_sec2(channel))
if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
semaphore_pool = gpu->secure_semaphore_pool;
status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
@ -2007,7 +2020,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
goto error;
}
if (uvm_channel_is_secure(channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
status = csl_init(channel);
if (status != NV_OK)
goto error;
@ -2075,7 +2088,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
NvU64 gpfifo_entry;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
if (uvm_channel_is_sec2(channel))
@ -2095,10 +2108,8 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
if (uvm_channel_is_ce(channel))
gpu->parent->ce_hal->init(&push);
else if (uvm_channel_is_sec2(channel))
gpu->parent->sec2_hal->init(&push);
else
UVM_ASSERT_MSG(0, "Unknown channel type!");
gpu->parent->sec2_hal->init(&push);
gpu->parent->host_hal->init(&push);
@ -2153,11 +2164,6 @@ static unsigned channel_pool_type_num_tsgs(uvm_channel_pool_type_t pool_type)
return 1;
}
static bool pool_type_is_valid(uvm_channel_pool_type_t pool_type)
{
return(is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
}
static UVM_GPU_CHANNEL_ENGINE_TYPE pool_type_to_engine_type(uvm_channel_pool_type_t pool_type)
{
if (pool_type == UVM_CHANNEL_POOL_TYPE_SEC2)
@ -2229,7 +2235,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
unsigned num_tsgs;
uvm_channel_pool_t *pool;
UVM_ASSERT(pool_type_is_valid(pool_type));
UVM_ASSERT(uvm_pool_type_is_valid(pool_type));
pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
channel_manager->num_channel_pools++;
@ -2260,10 +2266,10 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
num_channels = channel_pool_type_num_channels(pool_type);
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
if (pool->secure) {
if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
// Use different order lock for SEC2 and WLC channels.
// This allows reserving a SEC2 or WLC channel for indirect work
// submission while holding a reservation for a secure channel.
// submission while holding a reservation for a channel.
uvm_lock_order_t order = uvm_channel_pool_is_sec2(pool) ? UVM_LOCK_ORDER_CSL_SEC2_PUSH :
(uvm_channel_pool_is_wlc(pool) ? UVM_LOCK_ORDER_CSL_WLC_PUSH :
UVM_LOCK_ORDER_CSL_PUSH);
@ -2297,23 +2303,6 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
return status;
}
static NV_STATUS channel_pool_add_secure(uvm_channel_manager_t *channel_manager,
uvm_channel_pool_type_t pool_type,
unsigned engine_index,
uvm_channel_pool_t **pool_out)
{
uvm_channel_pool_t *pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
pool->secure = true;
return channel_pool_add(channel_manager, pool_type, engine_index, pool_out);
}
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type)
{
// For now, all channels are secure channels
return true;
}
static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
{
if (!cap->supported || cap->grce)
@ -2461,13 +2450,6 @@ static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
if (!ce_usable_for_channel_type(type, cap))
continue;
if (uvm_conf_computing_mode_is_hcc(manager->gpu)) {
// All usable CEs are secure
UVM_ASSERT(cap->secure);
// Multi-PCE LCEs are disallowed
UVM_ASSERT(hweight32(cap->cePceMask) == 1);
}
__set_bit(i, manager->ce_mask);
if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
@ -2523,7 +2505,7 @@ out:
return status;
}
// Return the non-secure pool corresponding to the given CE index
// Return the pool corresponding to the given CE index
//
// This function cannot be used to access the proxy pool in SR-IOV heavy.
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
@ -2734,24 +2716,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
{
unsigned num_channel_pools;
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// Create one CE channel pool per usable CE
num_channel_pools = num_used_ce;
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
// CE proxy channel pool.
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
num_channel_pools++;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
// Create one CE secure channel pool per usable CE
if (uvm_conf_computing_mode_is_hcc(manager->gpu))
num_channel_pools += num_used_ce;
// SEC2 pool, WLC pool, LCIC pool
if (uvm_conf_computing_mode_enabled(manager->gpu))
num_channel_pools += 3;
}
return num_channel_pools;
}
@ -2783,38 +2758,6 @@ static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager,
return NV_OK;
}
static NV_STATUS channel_manager_create_ce_secure_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
unsigned ce;
if (!uvm_conf_computing_mode_is_hcc(manager->gpu))
return NV_OK;
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
NV_STATUS status;
unsigned type;
uvm_channel_pool_t *pool = NULL;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
if (status != NV_OK)
return status;
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
unsigned preferred = preferred_ce[type];
if (preferred != ce)
continue;
if (uvm_channel_type_requires_secure_pool(manager->gpu, type)) {
UVM_ASSERT(manager->pool_to_use.default_for_type[type] == NULL);
manager->pool_to_use.default_for_type[type] = pool;
}
}
}
return NV_OK;
}
static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(wlc);
@ -3142,6 +3085,64 @@ static NV_STATUS channel_manager_setup_wlc_lcic(uvm_channel_pool_t *wlc_pool, uv
return NV_OK;
}
static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
NV_STATUS status;
unsigned wlc_lcic_ce_index;
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
if (!uvm_conf_computing_mode_enabled(manager->gpu))
return NV_OK;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
return NV_OK;
}
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
{
NV_STATUS status;
@ -3162,62 +3163,11 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
if (!manager->channel_pools)
return NV_ERR_NO_MEMORY;
if (uvm_conf_computing_mode_enabled(manager->gpu)) {
uvm_channel_pool_t *sec2_pool = NULL;
uvm_channel_pool_t *wlc_pool = NULL;
uvm_channel_pool_t *lcic_pool = NULL;
unsigned wlc_lcic_ce_index;
status = uvm_rm_mem_alloc(manager->gpu,
UVM_RM_MEM_TYPE_SYS,
sizeof(UvmCslIv),
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&manager->gpu->conf_computing.iv_rm_mem);
status = channel_manager_create_conf_computing_pools(manager, preferred_ce);
if (status != NV_OK)
return status;
// Create SEC2 pool. This needs to be done first, initialization of
// other channels needs SEC2.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_SEC2, 0, &sec2_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_SEC2] = sec2_pool;
// Use the same CE as CPU TO GPU channels for WLC/LCIC
// Both need to use the same engine for the fixed schedule to work.
// TODO: Bug 3981928: [hcc][uvm] Optimize parameters of WLC/LCIC secure
// work launch
// Find a metric to select the best CE to use
wlc_lcic_ce_index = preferred_ce[UVM_CHANNEL_TYPE_CPU_TO_GPU];
// Create WLC/LCIC pools. This should be done early, CE channels use
// them for secure launch. The WLC pool must be created before the LCIC.
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_WLC, wlc_lcic_ce_index, &wlc_pool);
if (status != NV_OK)
return status;
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] = wlc_pool;
status = channel_pool_add_secure(manager, UVM_CHANNEL_POOL_TYPE_LCIC, wlc_lcic_ce_index, &lcic_pool);
if (status != NV_OK)
return status;
status = channel_manager_setup_wlc_lcic(wlc_pool, lcic_pool);
if (status != NV_OK)
return status;
// The LCIC pool must be assigned after the call to
// channel_manager_setup_wlc_lcic(). It determines WLC and LCIC channels
// are ready to be used for secure work submission.
manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] = lcic_pool;
status = channel_manager_create_ce_secure_pools(manager, preferred_ce);
}
else {
status = channel_manager_create_ce_pools(manager, preferred_ce);
}
if (status != NV_OK)
return status;

View File

@ -104,16 +104,14 @@ typedef enum
// ----------------------------------
// Channel type with fixed schedules
// Work Launch Channel (WLC) is a specialized channel
// for launching work on other channels when
// Confidential Computing is enabled.
// It is paired with LCIC (below)
// Work Launch Channel (WLC) is a specialized channel for launching work on
// other channels when the Confidential Computing is feature enabled. It is
// paired with LCIC (below)
UVM_CHANNEL_TYPE_WLC,
// Launch Confirmation Indicator Channel (LCIC) is a
// specialized channel with fixed schedule. It gets
// triggered by executing WLC work, and makes sure that
// WLC get/put pointers are up-to-date.
// Launch Confirmation Indicator Channel (LCIC) is a specialized channel
// with fixed schedule. It gets triggered by executing WLC work, and makes
// sure that WLC get/put pointers are up-to-date.
UVM_CHANNEL_TYPE_LCIC,
UVM_CHANNEL_TYPE_COUNT,
@ -242,11 +240,9 @@ typedef struct
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
// Counting semaphore for available and unlocked channels, it must be
// acquired before submitting work to a secure channel.
// acquired before submitting work to a channel when the Confidential
// Computing feature is enabled.
uvm_semaphore_t push_sem;
// See uvm_channel_is_secure() documentation.
bool secure;
} uvm_channel_pool_t;
struct uvm_channel_struct
@ -304,8 +300,9 @@ struct uvm_channel_struct
// its internal operation and each push may modify this state.
uvm_mutex_t push_lock;
// Every secure channel has cryptographic state in HW, which is
// mirrored here for CPU-side operations.
// When the Confidential Computing feature is enabled, every channel has
// cryptographic state in HW, which is mirrored here for CPU-side
// operations.
UvmCslContext ctx;
bool is_ctx_initialized;
@ -459,46 +456,28 @@ struct uvm_channel_manager_struct
// Create a channel manager for the GPU
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool);
// A channel is secure if it has HW encryption capabilities.
//
// Secure channels are treated differently in the UVM driver. Each secure
// channel has a unique CSL context associated with it, has relatively
// restrictive reservation policies (in comparison with non-secure channels),
// it is requested to be allocated differently by RM, etc.
static bool uvm_channel_pool_is_secure(uvm_channel_pool_t *pool)
static bool uvm_pool_type_is_valid(uvm_channel_pool_type_t pool_type)
{
return pool->secure;
}
static bool uvm_channel_is_secure(uvm_channel_t *channel)
{
return uvm_channel_pool_is_secure(channel->pool);
return (is_power_of_2(pool_type) && (pool_type < UVM_CHANNEL_POOL_TYPE_MASK));
}
static bool uvm_channel_pool_is_sec2(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_SEC2);
}
static bool uvm_channel_pool_is_secure_ce(uvm_channel_pool_t *pool)
{
return uvm_channel_pool_is_secure(pool) && uvm_channel_pool_is_ce(pool);
}
static bool uvm_channel_pool_is_wlc(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_WLC);
}
static bool uvm_channel_pool_is_lcic(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_LCIC);
}
@ -508,11 +487,6 @@ static bool uvm_channel_is_sec2(uvm_channel_t *channel)
return uvm_channel_pool_is_sec2(channel->pool);
}
static bool uvm_channel_is_secure_ce(uvm_channel_t *channel)
{
return uvm_channel_pool_is_secure_ce(channel->pool);
}
static bool uvm_channel_is_wlc(uvm_channel_t *channel)
{
return uvm_channel_pool_is_wlc(channel->pool);
@ -523,12 +497,9 @@ static bool uvm_channel_is_lcic(uvm_channel_t *channel)
return uvm_channel_pool_is_lcic(channel->pool);
}
bool uvm_channel_type_requires_secure_pool(uvm_gpu_t *gpu, uvm_channel_type_t channel_type);
NV_STATUS uvm_channel_secure_init(uvm_gpu_t *gpu, uvm_channel_t *channel);
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
}
@ -540,11 +511,7 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
{
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
if (uvm_channel_pool_is_wlc(pool) || uvm_channel_pool_is_lcic(pool))
return true;
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
return !uvm_channel_pool_is_sec2(pool);
}
static bool uvm_channel_is_ce(uvm_channel_t *channel)
@ -686,6 +653,11 @@ static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
return channel->pool->manager->gpu;
}
static uvm_pushbuffer_t *uvm_channel_get_pushbuffer(uvm_channel_t *channel)
{
return channel->pool->manager->pushbuffer;
}
// Index of a channel within the owning pool
static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
{

View File

@ -681,9 +681,10 @@ done:
}
// The following test is inspired by uvm_push_test.c:test_concurrent_pushes.
// This test verifies that concurrent pushes using the same secure channel pool
// select different channels.
NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
// This test verifies that concurrent pushes using the same channel pool
// select different channels, when the Confidential Computing feature is
// enabled.
NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_channel_pool_t *pool;
@ -703,9 +704,6 @@ NV_STATUS test_secure_channel_selection(uvm_va_space_t *va_space)
uvm_channel_type_t channel_type;
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
if (!uvm_channel_type_requires_secure_pool(gpu, channel_type))
continue;
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
TEST_CHECK_RET(pool != NULL);
@ -997,7 +995,7 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
if (status != NV_OK)
goto done;
status = test_secure_channel_selection(va_space);
status = test_conf_computing_channel_selection(va_space);
if (status != NV_OK)
goto done;

View File

@ -579,8 +579,10 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
UVM_ASSERT(uvm_channel_is_secure_ce(channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
UVM_ASSERT(uvm_channel_is_ce(channel));
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);

View File

@ -91,9 +91,9 @@ struct uvm_gpu_tracking_semaphore_struct
// Create a semaphore pool for a GPU.
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
// When the Confidential Computing feature is enabled, pools associated with
// secure CE channels are allocated in the CPR of vidmem and as such have
// all the associated access restrictions. Because of this, they're called
// When the Confidential Computing feature is enabled, semaphore pools
// associated with CE channels are allocated in the CPR of vidmem and as such
// have all the associated access restrictions. Because of this, they're called
// secure pools and secure semaphores are allocated out of said secure pools.
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);

View File

@ -61,7 +61,11 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
// GH180.
parent_gpu->ce_phys_vidmem_write_supported = !uvm_gpu_is_coherent(parent_gpu);
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
// portion for the uvm tests became nonresponsive after
// some time and then failed even after reboot
parent_gpu->peer_copy_mode = uvm_gpu_is_coherent(parent_gpu) ?
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
// accessing GR context buffers support the 57-bit VA range.

View File

@ -491,7 +491,6 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
UVM_ASSERT(uvm_push_is_fake(push) || uvm_channel_is_secure(push->channel));
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
if (!src.is_virtual)
@ -540,7 +539,6 @@ void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT(uvm_conf_computing_mode_is_hcc(gpu));
UVM_ASSERT(!push->channel || uvm_channel_is_secure(push->channel));
UVM_ASSERT(IS_ALIGNED(auth_tag.address, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
// The addressing mode (and aperture, if applicable) of the source and

View File

@ -279,13 +279,14 @@
// Operations not allowed while holding the lock:
// - GPU memory allocation which can evict memory (would require nesting
// block locks)
//
// - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
// Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
// Condition: The Confidential Computing feature is enabled
// Exclusive lock (mutex)
//
// Protects:
// - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
// when the Confidential Computing feature is enabled on the system.
//
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
// gpu->sysmem_mappings.bitlock)
@ -321,22 +322,25 @@
// Operations not allowed while holding this lock
// - GPU memory allocation which can evict
//
// - Secure channel CSL channel pool semaphore
// - CE channel CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_PUSH
// Semaphore per SEC2 channel pool
// Condition: The Confidential Computing feature is enabled
// Semaphore per CE channel pool
//
// The semaphore controls concurrent pushes to secure channels. Secure work
// submission depends on channel availability in GPFIFO entries (as in any
// other channel type) but also on channel locking. Each secure channel has a
// lock to enforce ordering of pushes. The channel's CSL lock is taken on
// channel reservation until uvm_push_end. Secure channels are stateful
// channels and the CSL lock protects their CSL state/context.
// The semaphore controls concurrent pushes to CE channels that are not WCL
// channels. Secure work submission depends on channel availability in
// GPFIFO entries (as in any other channel type) but also on channel
// locking. Each channel has a lock to enforce ordering of pushes. The
// channel's CSL lock is taken on channel reservation until uvm_push_end.
// When the Confidential Computing feature is enabled, channels are
// stateful, and the CSL lock protects their CSL state/context.
//
// Operations allowed while holding this lock
// - Pushing work to CE secure channels
// - Pushing work to CE channels (except for WLC channels)
//
// - WLC CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
// Condition: The Confidential Computing feature is enabled
// Semaphore per WLC channel pool
//
// The semaphore controls concurrent pushes to WLC channels. WLC work
@ -346,8 +350,8 @@
// channel reservation until uvm_push_end. SEC2 channels are stateful
// channels and the CSL lock protects their CSL state/context.
//
// This lock ORDER is different and sits below generic secure channel CSL
// lock and above SEC2 CSL lock. This reflects the dual nature of WLC
// This lock ORDER is different and sits below the generic channel CSL
// lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
// channels; they use SEC2 indirect work launch during initialization,
// and after their schedule is initialized they provide indirect launch
// functionality to other CE channels.
@ -357,6 +361,7 @@
//
// - SEC2 CSL channel pool semaphore
// Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
// Condition: The Confidential Computing feature is enabled
// Semaphore per SEC2 channel pool
//
// The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
@ -366,9 +371,9 @@
// channel reservation until uvm_push_end. SEC2 channels are stateful
// channels and the CSL lock protects their CSL state/context.
//
// This lock ORDER is different and lower than the generic secure channel
// lock to allow secure work submission to use a SEC2 channel to submit
// work before releasing the CSL lock of the originating secure channel.
// This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
// to allow secure work submission to use a SEC2 channel to submit
// work before releasing the CSL lock of the originating channel.
//
// Operations allowed while holding this lock
// - Pushing work to SEC2 channels
@ -408,16 +413,18 @@
//
// - WLC Channel lock
// Order: UVM_LOCK_ORDER_WLC_CHANNEL
// Condition: The Confidential Computing feature is enabled
// Spinlock (uvm_spinlock_t)
//
// Lock protecting the state of WLC channels in a channel pool. This lock
// is separate from the above generic channel lock to allow for indirect
// worklaunch pushes while holding the main channel lock.
// (WLC pushes don't need any of the pushbuffer locks described above)
// is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
// to allow for indirect worklaunch pushes while holding the main channel
// lock (WLC pushes don't need any of the pushbuffer locks described
// above)
//
// - Tools global VA space list lock (g_tools_va_space_list_lock)
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
// Reader/writer lock (rw_sempahore)
// Reader/writer lock (rw_semaphore)
//
// This lock protects the list of VA spaces used when broadcasting
// UVM profiling events.
@ -437,9 +444,10 @@
//
// - Tracking semaphores
// Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
// When the Confidential Computing feature is enabled, CE semaphores are
// encrypted, and require to take the CSL lock (UVM_LOCK_ORDER_LEAF) to
// decrypt the payload.
// Condition: The Confidential Computing feature is enabled
//
// CE semaphore payloads are encrypted, and require to take the CSL lock
// (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
//
// - Leaf locks
// Order: UVM_LOCK_ORDER_LEAF

View File

@ -392,12 +392,6 @@ static NV_STATUS uvm_mem_alloc_vidmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **me
return uvm_mem_alloc(&params, mem_out);
}
// Helper for allocating protected vidmem with the default page size
static NV_STATUS uvm_mem_alloc_vidmem_protected(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **mem_out)
{
return uvm_mem_alloc_vidmem(size, gpu, mem_out);
}
// Helper for allocating sysmem and mapping it on the CPU
static NV_STATUS uvm_mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, struct mm_struct *mm, uvm_mem_t **mem_out)
{

View File

@ -134,6 +134,22 @@ static NV_STATUS block_migrate_map_unmapped_pages(uvm_va_block_t *va_block,
// first map operation
uvm_page_mask_complement(&va_block_context->caller_page_mask, &va_block->maybe_mapped_pages);
if (uvm_va_block_is_hmm(va_block) && !UVM_ID_IS_CPU(dest_id)) {
// Do not map pages that are already resident on the CPU. This is in
// order to avoid breaking system-wide atomic operations on HMM. HMM's
// implementation of system-side atomic operations involves restricting
// mappings to one processor (CPU or a GPU) at a time. If we were to
// grant a GPU a mapping to system memory, this gets into trouble
// because, on the CPU side, Linux can silently upgrade PTE permissions
// (move from read-only, to read-write, without any MMU notifiers
// firing), thus breaking the model by allowing simultaneous read-write
// access from two separate processors. To avoid that, just don't map
// such pages at all, when migrating.
uvm_page_mask_andnot(&va_block_context->caller_page_mask,
&va_block_context->caller_page_mask,
uvm_va_block_resident_mask_get(va_block, UVM_ID_CPU));
}
// Only map those pages that are not mapped anywhere else (likely due
// to a first touch or a migration). We pass
// UvmEventMapRemoteCauseInvalid since the destination processor of a

View File

@ -391,11 +391,13 @@ uvm_gpu_address_t uvm_push_inline_data_end(uvm_push_inline_data_t *data)
inline_data_address = (NvU64) (uintptr_t)(push->next + 1);
}
else {
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
// Offset of the inlined data within the push.
inline_data_address = (push->next - push->begin + 1) * UVM_METHOD_SIZE;
// Add GPU VA of the push begin
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(channel->pool->manager->pushbuffer, push);
inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);
}
// This will place a noop right before the inline data that was written.
@ -438,10 +440,8 @@ NvU64 *uvm_push_timestamp(uvm_push_t *push)
if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_timestamp(push, address.address);
else if (uvm_channel_is_sec2(push->channel))
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
else
UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n");
gpu->parent->sec2_hal->semaphore_timestamp(push, address.address);
return timestamp;
}

View File

@ -377,11 +377,6 @@ static bool uvm_push_has_space(uvm_push_t *push, NvU32 free_space)
NV_STATUS uvm_push_begin_fake(uvm_gpu_t *gpu, uvm_push_t *push);
void uvm_push_end_fake(uvm_push_t *push);
static bool uvm_push_is_fake(uvm_push_t *push)
{
return !push->channel;
}
// Begin an inline data fragment in the push
//
// The inline data will be ignored by the GPU, but can be referenced from

View File

@ -40,10 +40,9 @@
static NvU32 get_push_begin_size(uvm_channel_t *channel)
{
if (uvm_channel_is_sec2(channel)) {
// SEC2 channels allocate CSL signature buffer at the beginning.
if (uvm_channel_is_sec2(channel))
return UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE + UVM_METHOD_SIZE;
}
return 0;
}
@ -51,10 +50,14 @@ static NvU32 get_push_begin_size(uvm_channel_t *channel)
// This is the storage required by a semaphore release.
static NvU32 get_push_end_min_size(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
if (uvm_conf_computing_mode_enabled(gpu)) {
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// Space (in bytes) used by uvm_push_end() on a Secure CE channel.
// Note that Secure CE semaphore release pushes two memset and one
// Space (in bytes) used by uvm_push_end() on a CE channel when
// the Confidential Computing feature is enabled.
//
// Note that CE semaphore release pushes two memset and one
// encryption method on top of the regular release.
// Memset size
// -------------
@ -75,43 +78,44 @@ static NvU32 get_push_end_min_size(uvm_channel_t *channel)
//
// TOTAL : 144 Bytes
if (uvm_channel_is_wlc(channel)) {
// Same as CE + LCIC GPPut update + LCIC doorbell
return 24 + 144 + 24 + 24;
}
else if (uvm_channel_is_secure_ce(channel)) {
return 24 + 144;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return 24;
}
else if (uvm_channel_is_sec2(channel)) {
UVM_ASSERT(uvm_channel_is_sec2(channel));
// A perfectly aligned inline buffer in SEC2 semaphore release.
// We add UVM_METHOD_SIZE because of the NOP method to reserve
// UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES (the inline buffer.)
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_METHOD_SIZE;
}
return 0;
UVM_ASSERT(uvm_channel_is_ce(channel));
// Space (in bytes) used by uvm_push_end() on a CE channel.
return 24;
}
static NvU32 get_push_end_max_size(uvm_channel_t *channel)
{
if (uvm_channel_is_ce(channel)) {
if (uvm_channel_is_wlc(channel)) {
// WLC pushes are always padded to UVM_MAX_WLC_PUSH_SIZE
if (uvm_channel_is_wlc(channel))
return UVM_MAX_WLC_PUSH_SIZE;
}
// Space (in bytes) used by uvm_push_end() on a CE channel.
return get_push_end_min_size(channel);
}
else if (uvm_channel_is_sec2(channel)) {
// Space (in bytes) used by uvm_push_end() on a SEC2 channel.
// Note that SEC2 semaphore release uses an inline buffer with alignment
// requirements. This is the "worst" case semaphore_release storage.
if (uvm_channel_is_sec2(channel))
return 48 + UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES + UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT;
}
return 0;
UVM_ASSERT(uvm_channel_is_ce(channel));
// Space (in bytes) used by uvm_push_end() on a CE channel.
return get_push_end_min_size(channel);
}
static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
@ -294,10 +298,19 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu;
NvU32 i;
uvm_push_t *pushes;
uvm_tracker_t tracker = UVM_TRACKER_INIT();
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
uvm_tracker_t tracker;
// When the Confidential Computing feature is enabled, a channel reserved at
// the start of a push cannot be reserved again until that push ends. The
// test is waived, because the number of pushes it starts per pool exceeds
// the number of channels in the pool, so it would block indefinitely.
gpu = uvm_va_space_find_first_gpu(va_space);
if ((gpu != NULL) && uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
uvm_tracker_init(&tracker);
// As noted above, this test does unsafe things that would be detected by
// lock tracking, opt-out.
@ -310,16 +323,11 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
}
for_each_va_space_gpu(gpu, va_space) {
NvU32 i;
// A secure channels reserved at the start of a push cannot be reserved
// again until that push ends. The test would block indefinitely
// if secure pools are not skipped, because the number of pushes started
// per pool exceeds the number of channels in the pool.
if (uvm_channel_type_requires_secure_pool(gpu, channel_type))
goto done;
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
uvm_push_t *push = &pushes[i];
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, push, "concurrent push %u", i);
TEST_CHECK_GOTO(status == NV_OK, done);
}
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {

View File

@ -458,7 +458,7 @@ static void decrypt_push(uvm_channel_t *channel, uvm_gpfifo_entry_t *gpfifo)
void *push_unprotected_cpu_va;
NvU32 pushbuffer_offset = gpfifo->pushbuffer_offset;
NvU32 push_info_index = gpfifo->push_info - channel->push_infos;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
uvm_push_crypto_bundle_t *crypto_bundle = channel->conf_computing.push_crypto_bundles + push_info_index;
if (channel->conf_computing.push_crypto_bundles == NULL)
@ -499,7 +499,7 @@ void uvm_pushbuffer_mark_completed(uvm_channel_t *channel, uvm_gpfifo_entry_t *g
uvm_pushbuffer_chunk_t *chunk;
bool need_to_update_chunk = false;
uvm_push_info_t *push_info = gpfifo->push_info;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);

View File

@ -270,7 +270,7 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
*mem = NULL;
if (type == MEM_ALLOC_TYPE_VIDMEM_PROTECTED) {
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem_protected(size, gpu, mem));
TEST_NV_CHECK_RET(uvm_mem_alloc_vidmem(size, gpu, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
}
@ -348,9 +348,9 @@ static NV_STATUS cpu_decrypt(uvm_channel_t *channel,
return NV_OK;
}
// gpu_encrypt uses a secure CE for encryption (instead of SEC2). SEC2 does not
// support encryption. The following function is copied from uvm_ce_test.c and
// adapted to SEC2 tests.
// gpu_encrypt uses the Copy Engine for encryption, instead of SEC2. SEC2 does
// not support encryption. The following function is copied from uvm_ce_test.c
// and adapted to SEC2 tests.
static void gpu_encrypt(uvm_push_t *push,
uvm_mem_t *dst_mem,
uvm_mem_t *src_mem,

View File

@ -229,6 +229,24 @@ static void unmap_user_pages(struct page **pages, void *addr, NvU64 size)
uvm_kvfree(pages);
}
// This must be called with the mmap_lock held in read mode or better.
static NV_STATUS check_vmas(struct mm_struct *mm, NvU64 start_va, NvU64 size)
{
struct vm_area_struct *vma;
NvU64 addr = start_va;
NvU64 region_end = start_va + size;
do {
vma = find_vma(mm, addr);
if (!vma || !(addr >= vma->vm_start) || uvm_file_is_nvidia_uvm(vma->vm_file))
return NV_ERR_INVALID_ARGUMENT;
addr = vma->vm_end;
} while (addr < region_end);
return NV_OK;
}
// Map virtual memory of data from [user_va, user_va + size) of current process into kernel.
// Sets *addr to kernel mapping and *pages to the array of struct pages that contain the memory.
static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct page ***pages)
@ -237,7 +255,6 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
long ret = 0;
long num_pages;
long i;
struct vm_area_struct **vmas = NULL;
*addr = NULL;
*pages = NULL;
@ -254,22 +271,30 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
goto fail;
}
vmas = uvm_kvmalloc(sizeof(struct vm_area_struct *) * num_pages);
if (vmas == NULL) {
status = NV_ERR_NO_MEMORY;
// Although uvm_down_read_mmap_lock() is preferable due to its participation
// in the UVM lock dependency tracker, it cannot be used here. That's
// because pin_user_pages() may fault in HMM pages which are GPU-resident.
// When that happens, the UVM page fault handler would record another
// mmap_read_lock() on the same thread as this one, leading to a false
// positive lock dependency report.
//
// Therefore, use the lower level nv_mmap_read_lock() here.
nv_mmap_read_lock(current->mm);
status = check_vmas(current->mm, user_va, size);
if (status != NV_OK) {
nv_mmap_read_unlock(current->mm);
goto fail;
}
nv_mmap_read_lock(current->mm);
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, NULL);
nv_mmap_read_unlock(current->mm);
if (ret != num_pages) {
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
}
for (i = 0; i < num_pages; i++) {
if (page_count((*pages)[i]) > MAX_PAGE_COUNT || uvm_file_is_nvidia_uvm(vmas[i]->vm_file)) {
if (page_count((*pages)[i]) > MAX_PAGE_COUNT) {
status = NV_ERR_INVALID_ARGUMENT;
goto fail;
}
@ -279,15 +304,12 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
if (*addr == NULL)
goto fail;
uvm_kvfree(vmas);
return NV_OK;
fail:
if (*pages == NULL)
return status;
uvm_kvfree(vmas);
if (ret > 0)
uvm_put_user_pages_dirty(*pages, ret);
else if (ret < 0)

View File

@ -3055,7 +3055,7 @@ static NV_STATUS conf_computing_copy_pages_finish(uvm_va_block_t *block,
void *auth_tag_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
void *staging_buffer_base = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
UVM_ASSERT(uvm_channel_is_secure(push->channel));
UVM_ASSERT(uvm_conf_computing_mode_enabled(push->gpu));
if (UVM_ID_IS_GPU(copy_state->dst.id))
return NV_OK;
@ -3106,7 +3106,7 @@ static void block_copy_push(uvm_va_block_t *block,
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
if (uvm_channel_is_secure(push->channel)) {
if (uvm_conf_computing_mode_enabled(gpu)) {
if (UVM_ID_IS_CPU(copy_state->src.id))
conf_computing_block_copy_push_cpu_to_gpu(block, copy_state, region, push);
else
@ -3134,19 +3134,18 @@ static NV_STATUS block_copy_end_push(uvm_va_block_t *block,
// at that point.
uvm_push_end(push);
if ((push_status == NV_OK) && uvm_channel_is_secure(push->channel))
if ((push_status == NV_OK) && uvm_conf_computing_mode_enabled(push->gpu))
push_status = conf_computing_copy_pages_finish(block, copy_state, push);
tracker_status = uvm_tracker_add_push_safe(copy_tracker, push);
if (push_status == NV_OK)
push_status = tracker_status;
if (uvm_channel_is_secure(push->channel)) {
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_conf_computing_mode_enabled(push->gpu)) {
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
uvm_tracker_overwrite_with_push(&local_tracker, push);
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool,
uvm_conf_computing_dma_buffer_free(&push->gpu->conf_computing.dma_buffer_pool,
copy_state->dma_buffer,
&local_tracker);
copy_state->dma_buffer = NULL;
@ -9612,16 +9611,10 @@ static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
if (uvm_processor_mask_empty(&revoke_processors))
new_prot = UVM_PROT_READ_WRITE;
}
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC) {
// HMM allocations with logical read/write/atomic permission can be
// upgraded without notifying the driver so assume read/write/atomic
// even if the fault is only for reading.
if (new_prot == UVM_PROT_READ_WRITE ||
(UVM_ID_IS_CPU(fault_processor_id) && uvm_va_block_is_hmm(va_block))) {
if (logical_prot == UVM_PROT_READ_WRITE_ATOMIC && new_prot == UVM_PROT_READ_WRITE) {
if (uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(new_residency)], fault_processor_id))
new_prot = UVM_PROT_READ_WRITE_ATOMIC;
}
}
return new_prot;
}
@ -9857,8 +9850,6 @@ out:
return status == NV_OK ? tracker_status : status;
}
// TODO: Bug 1750144: check logical permissions from HMM to know what's the
// maximum allowed.
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index)
@ -9935,14 +9926,18 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
// Exclude the processor for which the mapping protections are being computed
uvm_processor_mask_clear(&write_mappings, processor_id);
// At this point, any processor with atomic mappings either has native atomics support to the
// processor with the resident copy or has disabled system-wide atomics. If the requesting
// processor has disabled system-wide atomics or has native atomics to that processor, we can
// map with ATOMIC privileges. Likewise, if there are no other processors with WRITE or ATOMIC
// mappings, we can map with ATOMIC privileges.
// At this point, any processor with atomic mappings either has native
// atomics support to the processor with the resident copy or has
// disabled system-wide atomics. If the requesting processor has
// disabled system-wide atomics or has native atomics to that processor,
// we can map with ATOMIC privileges. Likewise, if there are no other
// processors with WRITE or ATOMIC mappings, we can map with ATOMIC
// privileges. For HMM, don't allow GPU atomic access to remote mapped
// system memory even if there are no write mappings since CPU access
// can be upgraded without notification.
if (!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, processor_id) ||
uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], processor_id) ||
uvm_processor_mask_empty(&write_mappings)) {
(uvm_processor_mask_empty(&write_mappings) && !uvm_va_block_is_hmm(va_block))) {
return UVM_PROT_READ_WRITE_ATOMIC;
}

View File

@ -86,6 +86,14 @@
#include <linux/ioport.h>
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
#include <linux/cc_platform.h>
#endif
#if defined(NV_ASM_CPUFEATURE_H_PRESENT)
#include <asm/cpufeature.h>
#endif
#include "conftest/patches.h"
#define RM_THRESHOLD_TOTAL_IRQ_COUNT 100000
@ -139,8 +147,6 @@ struct semaphore nv_linux_devices_lock;
static NvTristate nv_chipset_is_io_coherent = NV_TRISTATE_INDETERMINATE;
NvU64 nv_shared_gpa_boundary = 0;
// True if all the successfully probed devices support ATS
// Assigned at device probe (module init) time
NvBool nv_ats_supported = NVCPU_IS_PPC64LE
@ -234,77 +240,23 @@ struct dev_pm_ops nv_pm_ops = {
*** STATIC functions
***/
#if defined(NVCPU_X86_64)
#define NV_AMD_SEV_BIT BIT(1)
#define NV_GENMASK_ULL(h, l) \
(((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
static
void get_shared_gpa_boundary(
void nv_detect_conf_compute_platform(
void
)
{
NvU32 priv_high = cpuid_ebx(0x40000003);
if (priv_high & BIT(22))
#if defined(NV_CC_PLATFORM_PRESENT)
os_cc_enabled = cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
#if defined(X86_FEATURE_TDX_GUEST)
if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
{
NvU32 isolation_config_b = cpuid_ebx(0x4000000C);
nv_shared_gpa_boundary = ((NvU64)1) << ((isolation_config_b & NV_GENMASK_ULL(11, 6)) >> 6);
}
}
static
NvBool nv_is_sev_supported(
void
)
{
unsigned int eax, ebx, ecx, edx;
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return NV_FALSE;
/* By design, a VM using vTOM doesn't see the SEV setting */
get_shared_gpa_boundary();
if (nv_shared_gpa_boundary != 0)
return NV_TRUE;
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & NV_AMD_SEV_BIT))
return NV_FALSE;
return NV_TRUE;
os_cc_tdx_enabled = NV_TRUE;
}
#endif
static
void nv_sev_init(
void
)
{
#if defined(MSR_AMD64_SEV) && defined(NVCPU_X86_64)
NvU32 lo_val, hi_val;
if (!nv_is_sev_supported())
return;
rdmsr(MSR_AMD64_SEV, lo_val, hi_val);
os_sev_status = lo_val;
#if defined(MSR_AMD64_SEV_ENABLED)
os_sev_enabled = (os_sev_status & MSR_AMD64_SEV_ENABLED);
#endif
/* By design, a VM using vTOM doesn't see the SEV setting */
if (nv_shared_gpa_boundary != 0)
os_sev_enabled = NV_TRUE;
#else
os_cc_enabled = NV_FALSE;
os_cc_tdx_enabled = NV_FALSE;
#endif
}
@ -710,7 +662,7 @@ nv_module_init(nv_stack_t **sp)
}
nv_init_rsync_info();
nv_sev_init();
nv_detect_conf_compute_platform();
if (!rm_init_rm(*sp))
{
@ -4570,19 +4522,19 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
* as the starting address for all DMA mappings.
*/
saved_dma_mask = pci_dev->dma_mask;
if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64)) != 0)
if (dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)) != 0)
{
goto done;
}
dma_addr = pci_map_single(pci_dev, NULL, 1, DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(pci_dev, dma_addr))
dma_addr = dma_map_single(&pci_dev->dev, NULL, 1, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&pci_dev->dev, dma_addr))
{
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
pci_unmap_single(pci_dev, dma_addr, 1, DMA_BIDIRECTIONAL);
dma_unmap_single(&pci_dev->dev, dma_addr, 1, DMA_BIDIRECTIONAL);
/*
* From IBM: "For IODA2, native DMA bypass or KVM TCE-based implementation
@ -4614,7 +4566,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
*/
nv_printf(NV_DBG_WARNINGS,
"NVRM: DMA window limited by platform\n");
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
else if ((dma_addr & saved_dma_mask) != 0)
@ -4633,7 +4585,7 @@ NvU64 NV_API_CALL nv_get_dma_start_address(
*/
nv_printf(NV_DBG_WARNINGS,
"NVRM: DMA window limited by memory size\n");
pci_set_dma_mask(pci_dev, saved_dma_mask);
dma_set_mask(&pci_dev->dev, saved_dma_mask);
goto done;
}
}

View File

@ -209,7 +209,7 @@ NV_STATUS nvUvmInterfaceSessionCreate(uvmGpuSessionHandle *session,
memset(platformInfo, 0, sizeof(*platformInfo));
platformInfo->atsSupported = nv_ats_supported;
platformInfo->sevEnabled = os_sev_enabled;
platformInfo->sevEnabled = os_cc_enabled;
status = rm_gpu_ops_create_session(sp, (gpuSessionHandle *)session);

View File

@ -120,6 +120,9 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_array_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_driver_hardened_wc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioremap_cache_shared
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_get_domain_bus_and_slot
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_num_physpages
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
@ -156,8 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += full_name_hash
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_enable_atomic_ops_to_root
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vga_tryget
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pgprot_decrypted
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_mkdec
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_platform_has
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
NV_CONFTEST_FUNCTION_COMPILE_TESTS += unsafe_follow_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get

View File

@ -41,8 +41,8 @@ extern nv_kthread_q_t nv_kthread_q;
NvU32 os_page_size = PAGE_SIZE;
NvU64 os_page_mask = NV_PAGE_MASK;
NvU8 os_page_shift = PAGE_SHIFT;
NvU32 os_sev_status = 0;
NvBool os_sev_enabled = 0;
NvBool os_cc_enabled = 0;
NvBool os_cc_tdx_enabled = 0;
#if defined(CONFIG_DMA_SHARED_BUFFER)
NvBool os_dma_buf_enabled = NV_TRUE;

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r536_62
#define NV_BUILD_BRANCH r535_00
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r536_62
#define NV_PUBLIC_BRANCH r535_00
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r536_62-219"
#define NV_BUILD_CHANGELIST_NUM (33114094)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r535_00-239"
#define NV_BUILD_CHANGELIST_NUM (33134228)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r536_62-219"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33114094)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r535_00-239"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33134228)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r536_62-3"
#define NV_BUILD_CHANGELIST_NUM (33061144)
#define NV_BUILD_BRANCH_VERSION "r535_00-254"
#define NV_BUILD_CHANGELIST_NUM (33134228)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "536.67"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33061144)
#define NV_BUILD_NAME "536.92"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33134228)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.86.10"
#define NV_VERSION_STRING "535.98"
#else

View File

@ -43,7 +43,7 @@
* This structure represents vGPU host device KERNEL object allocation parameters.
* dbdf -> domain (31:16), bus (15:8), device (7:3), function (2:0)
* gfid -> Used only when SRIOV is enabled otherwise set to 0.
* swizzId [IN/OUT] -> Used only when MIG mode is enabled otherwise set
* swizzId [IN] -> Used only when MIG mode is enabled otherwise set
* to NV2080_CTRL_GPU_PARTITION_ID_INVALID.
* numChannels -> Used only when SRIOV is enabled. Must be a power of 2.
* bDisableDefaultSmcExecPartRestore - If set to true, SMC default execution partition

View File

@ -837,6 +837,7 @@ typedef struct NV2080_CTRL_NVLINK_INJECT_ERROR_PARAMS {
#define NV2080_CTRL_NVLINK_UNIT_TLC_TX_0 0x05U
#define NV2080_CTRL_NVLINK_UNIT_MIF_RX_0 0x06U
#define NV2080_CTRL_NVLINK_UNIT_MIF_TX_0 0x07U
#define NV2080_CTRL_NVLINK_UNIT_MINION 0x08U
/*
* NV2080_CTRL_CMD_NVLINK_GET_ERROR_RECOVERIES

View File

@ -105,6 +105,7 @@
#define NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_NONE 0
#define NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_AMD_SEV 1
#define NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_INTEL_TDX 2
#define NV_CONF_COMPUTE_SYSTEM_GPUS_CAPABILITY_NONE 0
#define NV_CONF_COMPUTE_SYSTEM_GPUS_CAPABILITY_APM 1

View File

@ -15,6 +15,7 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
{ 0x20B5, 0x1642, 0x20B5, 0x1533 },
{ 0x20B8, 0x1581, 0x20B5, 0x1533 },
{ 0x20B9, 0x157F, 0x20B7, 0x1532 },
{ 0x2330, 0x16C0, 0x2330, 0x16C1 },
};
for (NvU32 i = 0; i < (sizeof(vgpu_aliases) / sizeof(struct vgpu_alias_details)); ++i) {
@ -28,3 +29,5 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
return;
}
#endif //__VGPU_ALIAS_PGPU_LIST__

View File

@ -1394,6 +1394,12 @@ typedef struct _NVHwModeTimingsEvo {
NVHwModeViewPortEvo viewPort;
} NVHwModeTimingsEvo;
static inline NvBool nvIsAdaptiveSyncDpyVrrType(enum NvKmsDpyVRRType type)
{
return ((type == NVKMS_DPY_VRR_TYPE_ADAPTIVE_SYNC_DEFAULTLISTED) ||
(type == NVKMS_DPY_VRR_TYPE_ADAPTIVE_SYNC_NON_DEFAULTLISTED));
}
static inline NvU64 nvEvoFrametimeUsFromTimings(const NVHwModeTimingsEvo *pTimings)
{
NvU64 pixelsPerFrame = pTimings->rasterSize.x * pTimings->rasterSize.y;

View File

@ -232,6 +232,10 @@ static void SetDPMSATiming(const NVDispEvoRec *pDispEvo,
NV0073_CTRL_CMD_DP_SET_MSA_PROPERTIES_PARAMS *msaParams,
const NVHwModeTimingsEvo *pTimings)
{
NV0073_CTRL_DP_MSA_PROPERTIES_MASK *featureMask = &msaParams->featureMask;
NV0073_CTRL_DP_MSA_PROPERTIES_VALUES *featureValues =
&msaParams->featureValues;
nvkms_memset(msaParams, 0, sizeof(*msaParams));
/*
@ -244,12 +248,16 @@ static void SetDPMSATiming(const NVDispEvoRec *pDispEvo,
msaParams->subDeviceInstance = pDispEvo->displayOwner;
msaParams->displayId = displayId;
if ((pTimings->yuv420Mode == NV_YUV420_MODE_SW) && displayId != 0) {
NV0073_CTRL_DP_MSA_PROPERTIES_MASK *featureMask = &msaParams->featureMask;
NV0073_CTRL_DP_MSA_PROPERTIES_VALUES *featureValues = &msaParams->featureValues;
if ((displayId == 0x0) ||
((pTimings->yuv420Mode != NV_YUV420_MODE_SW) &&
!nvIsAdaptiveSyncDpyVrrType(pTimings->vrr.type))) {
return;
}
msaParams->bEnableMSA = 1;
msaParams->bCacheMsaOverrideForNextModeset = 1;
if (pTimings->yuv420Mode == NV_YUV420_MODE_SW) {
featureMask->bRasterTotalHorizontal = true;
featureMask->bActiveStartHorizontal = true;
featureMask->bSurfaceTotalHorizontal = true;
@ -259,6 +267,15 @@ static void SetDPMSATiming(const NVDispEvoRec *pDispEvo,
featureValues->surfaceTotalHorizontal = 2 * nvEvoVisibleWidth(pTimings);
featureValues->syncWidthHorizontal = 2 * (pTimings->rasterSyncEnd.x + 1);
}
/*
* In case of Adaptive-Sync VRR, override VTotal field of MSA (Main Stream
* Attributes) to workaround bug 4164132.
*/
if (nvIsAdaptiveSyncDpyVrrType(pTimings->vrr.type)) {
featureMask->bRasterTotalVertical = true;
featureValues->rasterTotalVertical = pTimings->rasterSize.y;
}
}
static void InitDpModesetParams(

View File

@ -2117,7 +2117,10 @@ IsProposedModeSetStateOneApiHeadIncompatible(
&pDispEvo->apiHeadState[apiHead];
const NVDpyEvoRec *pDpyEvo =
nvGetOneArbitraryDpyEvo(pApiHeadState->activeDpys, pDispEvo);
const NvBool bCurrent2Heads1Or =
(nvPopCount32(pApiHeadState->hwHeadsMask) > 1);
const NvBool bProposed2Heads1Or =
(nvPopCount32(pProposedApiHead->hwHeadsMask) > 1);
nvAssert(pDpyEvo != NULL);
/*
@ -2127,11 +2130,22 @@ IsProposedModeSetStateOneApiHeadIncompatible(
*
* Consider this api-head incompatible if there is change in the api-head
* to hardware-head(s) mapping.
*
* Mark api-head incompatible if its current or proposed modeset state is
* using 2Heads1OR configuration:
* Even if there is no change in the hardware heads or modetimings, it is
* not possible to do modeset on the active 2Heads1OR api-head without
* shutting it down first. The modeset code path is ready to handle the
* glitchless 2Heads1OR modeset, for example NV0073_CTRL_CMD_DFP_ASSIGN_SOR
* does not handles the assignment of secondary SORs if display is already
* active and returns incorrect information which leads to segfault in
* NVKMS.
*/
if (nvConnectorUsesDPLib(pDpyEvo->pConnectorEvo) ||
((pProposedApiHead->hwHeadsMask != 0x0) &&
(pProposedApiHead->hwHeadsMask != pApiHeadState->hwHeadsMask))) {
(pProposedApiHead->hwHeadsMask != pApiHeadState->hwHeadsMask)) ||
bCurrent2Heads1Or || bProposed2Heads1Or) {
return TRUE;
}

View File

@ -183,13 +183,6 @@ static NvBool DpyIsGsync(const NVDpyEvoRec *pDpyEvo)
return pDpyEvo->vrr.type == NVKMS_DPY_VRR_TYPE_GSYNC;
}
static NvBool IsAdaptiveSyncDpyVrrType(enum NvKmsDpyVRRType type)
{
return ((type == NVKMS_DPY_VRR_TYPE_ADAPTIVE_SYNC_DEFAULTLISTED) ||
(type == NVKMS_DPY_VRR_TYPE_ADAPTIVE_SYNC_NON_DEFAULTLISTED));
}
static NvBool AnyEnabledAdaptiveSyncDpys(const NVDevEvoRec *pDevEvo)
{
NVDispEvoPtr pDispEvo;
@ -202,7 +195,7 @@ static NvBool AnyEnabledAdaptiveSyncDpys(const NVDevEvoRec *pDevEvo)
const NVDispHeadStateEvoRec *pHeadState =
&pDispEvo->headState[head];
if (IsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
if (nvIsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
return TRUE;
}
}
@ -286,7 +279,7 @@ void nvAdjustHwModeTimingsForVrrEvo(NVHwModeTimingsEvoPtr pTimings,
// Allow overriding the EDID min refresh rate on Adaptive-Sync
// displays.
if (IsAdaptiveSyncDpyVrrType(vrrType) && vrrOverrideMinRefreshRate) {
if (nvIsAdaptiveSyncDpyVrrType(vrrType) && vrrOverrideMinRefreshRate) {
NvU32 minMinRefreshRate, maxMinRefreshRate;
NvU32 clampedMinRefreshRate;
@ -737,7 +730,7 @@ void nvDisableVrr(NVDevEvoPtr pDevEvo)
NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head];
if ((pHeadState->pConnectorEvo != NULL) &&
IsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
nvIsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
if (nvConnectorUsesDPLib(pHeadState->pConnectorEvo)) {
nvDPLibSetAdaptiveSync(pDispEvo, head, FALSE);
} else {
@ -799,7 +792,7 @@ void nvGetDpyMinRefreshRateValidValues(
{
NvU32 edidMinRefreshRate;
if (IsAdaptiveSyncDpyVrrType(vrrType)) {
if (nvIsAdaptiveSyncDpyVrrType(vrrType)) {
/*
* Adaptive-Sync monitors must always define a nonzero minimum refresh
* rate in the EDID, and a modeset may override this within a range
@ -860,7 +853,7 @@ void nvEnableVrr(NVDevEvoPtr pDevEvo)
NVDispHeadStateEvoRec *pHeadState = &pDispEvo->headState[head];
if ((pHeadState->pConnectorEvo != NULL) &&
IsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
nvIsAdaptiveSyncDpyVrrType(pHeadState->timings.vrr.type)) {
if (nvConnectorUsesDPLib(pHeadState->pConnectorEvo)) {
nvDPLibSetAdaptiveSync(pDispEvo, head, TRUE);
} else {

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2008-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2008-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -262,6 +262,7 @@ typedef NvU8 FLCN_STATUS;
// Warnings.
#define FLCN_WARN_NOTHING_TO_DO (0xD0U)
#define FLCN_WARN_NOT_QUERIED (0xD1U)
#define FLCN_WARN_DATA_NOT_AVAILABLE (0xD2U)
// Queue handling Errors
#define FLCN_ERR_QUEUE_MGMT_INVALID_UNIT_ID (0xE0U)

View File

@ -84,6 +84,7 @@ CHIPSET_SETUP_FUNC(QEMU_setupFunc)
CHIPSET_SETUP_FUNC(Ampere_eMag_setupFunc)
CHIPSET_SETUP_FUNC(Huawei_Kunpeng920_setupFunc)
CHIPSET_SETUP_FUNC(Mellanox_BlueField_setupFunc)
CHIPSET_SETUP_FUNC(Mellanox_BlueField3_setupFunc)
CHIPSET_SETUP_FUNC(Amazon_Gravitron2_setupFunc)
CHIPSET_SETUP_FUNC(Fujitsu_A64FX_setupFunc)
CHIPSET_SETUP_FUNC(Ampere_Altra_setupFunc)
@ -244,7 +245,7 @@ CSINFO chipsetInfo[] =
{PCI_VENDOR_ID_MELLANOX, 0xA2D0, CS_MELLANOX_BLUEFIELD, "Mellanox BlueField", Mellanox_BlueField_setupFunc},
{PCI_VENDOR_ID_MELLANOX, 0xA2D4, CS_MELLANOX_BLUEFIELD2, "Mellanox BlueField 2", NULL},
{PCI_VENDOR_ID_MELLANOX, 0xA2D5, CS_MELLANOX_BLUEFIELD2, "Mellanox BlueField 2 Crypto disabled", NULL},
{PCI_VENDOR_ID_MELLANOX, 0xA2DB, CS_MELLANOX_BLUEFIELD3, "Mellanox BlueField 3", NULL},
{PCI_VENDOR_ID_MELLANOX, 0xA2DB, CS_MELLANOX_BLUEFIELD3, "Mellanox BlueField 3", Mellanox_BlueField3_setupFunc},
{PCI_VENDOR_ID_AMAZON, 0x0200, CS_AMAZON_GRAVITRON2, "Amazon Gravitron2", Amazon_Gravitron2_setupFunc},
{PCI_VENDOR_ID_FUJITSU, 0x1952, CS_FUJITSU_A64FX, "Fujitsu A64FX", Fujitsu_A64FX_setupFunc},
{PCI_VENDOR_ID_CADENCE, 0xDC01, CS_PHYTIUM_S2500, "Phytium S2500", NULL},

View File

@ -150,7 +150,8 @@
#define LSF_FALCON_ID_NVJPG_RISCV_EB (23U)
#define LSF_FALCON_ID_OFA_RISCV_EB (24U)
#define LSF_FALCON_ID_NVENC_RISCV_EB (25U)
#define LSF_FALCON_ID_END (26U)
#define LSF_FALCON_ID_PMU_RISCV_EB (26U)
#define LSF_FALCON_ID_END (27U)
#define LSF_FALCON_ID_INVALID (0xFFFFFFFFU)
@ -176,13 +177,13 @@
#define LSF_FALCON_ID_END_18 (18U)
#define LSF_FALCON_ID_END_21 (21U)
#define LSF_FALCON_INSTANCE_DEFAULT_0 (0x0)
#define LSF_FALCON_INSTANCE_COUNT_DEFAULT_1 (0x1)
#define LSF_FALCON_INSTANCE_DEFAULT_0 (0x0U)
#define LSF_FALCON_INSTANCE_COUNT_DEFAULT_1 (0x1U)
// Currently max supported instance is 8 for FECS/GPCCS SMC
#define LSF_FALCON_INSTANCE_FECS_GPCCS_MAX (0x8)
#define LSF_FALCON_INSTANCE_FECS_GPCCS_MAX (0x8U)
#define LSF_FALCON_INSTANCE_INVALID (0xFFFFFFFFU)
#define LSF_FALCON_INDEX_MASK_DEFAULT_0 (0x0)
#define LSF_FALCON_INDEX_MASK_DEFAULT_0 (0x0U)
/*!
* Checks if the LSF Falcon specified by falconId uses a falconInstance to uniquely identify itself.

View File

@ -214,8 +214,8 @@ NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;
extern NvU8 os_page_shift;
extern NvU32 os_sev_status;
extern NvBool os_sev_enabled;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
/*

View File

@ -2712,8 +2712,8 @@ void osInitSystemStaticConfig(SYS_STATIC_CONFIG *pConfig)
{
pConfig->bIsNotebook = rm_is_system_notebook();
pConfig->osType = nv_get_os_type();
pConfig->osSevStatus = os_sev_status;
pConfig->bOsSevEnabled = os_sev_enabled;
pConfig->bOsCCEnabled = os_cc_enabled;
pConfig->bOsCCTdxEnabled = os_cc_tdx_enabled;
}
NvU32 osApiLockAcquireConfigureFlags(NvU32 flags)
@ -4161,24 +4161,34 @@ osAllocPagesNode
return status;
}
NV_STATUS
void
osAllocAcquirePage
(
NvU64 pAddress
NvU64 pAddress,
NvU32 pageCount
)
{
os_get_page(pAddress);
return NV_OK;
NvU32 i;
for (i = 0; i < pageCount; i++)
{
os_get_page(pAddress + (i << os_page_shift));
}
}
NV_STATUS
void
osAllocReleasePage
(
NvU64 pAddress
NvU64 pAddress,
NvU32 pageCount
)
{
os_put_page(pAddress);
return NV_OK;
NvU32 i;
for (i = 0; i < pageCount; i++)
{
os_put_page(pAddress + (i << os_page_shift));
}
}
/*
@ -4262,6 +4272,12 @@ osGetPageSize(void)
return os_page_size;
}
NvU8
osGetPageShift(void)
{
return os_page_shift;
}
/*

View File

@ -700,6 +700,15 @@ NV_STATUS rpcDumpProtobufComponent_STUB(
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
}
// RPC:hal:ECC_NOTIFIER_WRITE_ACK - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
NV_STATUS rpcEccNotifierWriteAck_STUB(
POBJGPU pGpu,
POBJRPC pRpc
)
{
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
}
// RPC:hal:ALLOC_MEMORY - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
NV_STATUS rpcAllocMemory_STUB(
POBJGPU pGpu,

View File

@ -82,6 +82,10 @@ static NV_STATUS __nvoc_thunk_KernelMemorySystem_engstateStatePostLoad(OBJGPU *p
return kmemsysStatePostLoad(pGpu, (struct KernelMemorySystem *)(((unsigned char *)pKernelMemorySystem) - __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset), flags);
}
static NV_STATUS __nvoc_thunk_KernelMemorySystem_engstateStatePreUnload(OBJGPU *pGpu, struct OBJENGSTATE *pKernelMemorySystem, NvU32 flags) {
return kmemsysStatePreUnload(pGpu, (struct KernelMemorySystem *)(((unsigned char *)pKernelMemorySystem) - __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset), flags);
}
static void __nvoc_thunk_KernelMemorySystem_engstateStateDestroy(OBJGPU *pGpu, struct OBJENGSTATE *pKernelMemorySystem) {
kmemsysStateDestroy(pGpu, (struct KernelMemorySystem *)(((unsigned char *)pKernelMemorySystem) - __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset));
}
@ -98,10 +102,6 @@ static NV_STATUS __nvoc_thunk_OBJENGSTATE_kmemsysStatePostUnload(POBJGPU pGpu, s
return engstateStatePostUnload(pGpu, (struct OBJENGSTATE *)(((unsigned char *)pEngstate) + __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset), arg0);
}
static NV_STATUS __nvoc_thunk_OBJENGSTATE_kmemsysStatePreUnload(POBJGPU pGpu, struct KernelMemorySystem *pEngstate, NvU32 arg0) {
return engstateStatePreUnload(pGpu, (struct OBJENGSTATE *)(((unsigned char *)pEngstate) + __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset), arg0);
}
static NV_STATUS __nvoc_thunk_OBJENGSTATE_kmemsysStateInitUnlocked(POBJGPU pGpu, struct KernelMemorySystem *pEngstate) {
return engstateStateInitUnlocked(pGpu, (struct OBJENGSTATE *)(((unsigned char *)pEngstate) + __nvoc_rtti_KernelMemorySystem_OBJENGSTATE.offset));
}
@ -217,6 +217,8 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
pThis->__kmemsysStatePostLoad__ = &kmemsysStatePostLoad_IMPL;
pThis->__kmemsysStatePreUnload__ = &kmemsysStatePreUnload_IMPL;
pThis->__kmemsysStateDestroy__ = &kmemsysStateDestroy_IMPL;
// Hal function -- kmemsysGetFbNumaInfo
@ -413,6 +415,16 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
pThis->__kmemsysSetupAllAtsPeers__ = &kmemsysSetupAllAtsPeers_GV100;
}
// Hal function -- kmemsysRemoveAllAtsPeers
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */
{
pThis->__kmemsysRemoveAllAtsPeers__ = &kmemsysRemoveAllAtsPeers_b3696a;
}
else
{
pThis->__kmemsysRemoveAllAtsPeers__ = &kmemsysRemoveAllAtsPeers_GV100;
}
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelMemorySystem_engstateConstructEngine;
pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelMemorySystem_engstateStateInitLocked;
@ -421,6 +433,8 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
pThis->__nvoc_base_OBJENGSTATE.__engstateStatePostLoad__ = &__nvoc_thunk_KernelMemorySystem_engstateStatePostLoad;
pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreUnload__ = &__nvoc_thunk_KernelMemorySystem_engstateStatePreUnload;
pThis->__nvoc_base_OBJENGSTATE.__engstateStateDestroy__ = &__nvoc_thunk_KernelMemorySystem_engstateStateDestroy;
pThis->__kmemsysStateLoad__ = &__nvoc_thunk_OBJENGSTATE_kmemsysStateLoad;
@ -429,8 +443,6 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
pThis->__kmemsysStatePostUnload__ = &__nvoc_thunk_OBJENGSTATE_kmemsysStatePostUnload;
pThis->__kmemsysStatePreUnload__ = &__nvoc_thunk_OBJENGSTATE_kmemsysStatePreUnload;
pThis->__kmemsysStateInitUnlocked__ = &__nvoc_thunk_OBJENGSTATE_kmemsysStateInitUnlocked;
pThis->__kmemsysInitMissing__ = &__nvoc_thunk_OBJENGSTATE_kmemsysInitMissing;

View File

@ -201,6 +201,7 @@ struct KernelMemorySystem {
NV_STATUS (*__kmemsysStateInitLocked__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysStatePreLoad__)(OBJGPU *, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStatePostLoad__)(OBJGPU *, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStatePreUnload__)(OBJGPU *, struct KernelMemorySystem *, NvU32);
void (*__kmemsysStateDestroy__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysGetFbNumaInfo__)(OBJGPU *, struct KernelMemorySystem *, NvU64 *, NvS32 *);
NV_STATUS (*__kmemsysReadUsableFbSize__)(OBJGPU *, struct KernelMemorySystem *, NvU64 *);
@ -220,10 +221,10 @@ struct KernelMemorySystem {
void (*__kmemsysNumaRemoveMemory__)(OBJGPU *, struct KernelMemorySystem *, NvU32);
void (*__kmemsysNumaRemoveAllMemory__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysSetupAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
void (*__kmemsysRemoveAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysStateLoad__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStateUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStatePostUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStatePreUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
NV_STATUS (*__kmemsysStateInitUnlocked__)(POBJGPU, struct KernelMemorySystem *);
void (*__kmemsysInitMissing__)(POBJGPU, struct KernelMemorySystem *);
NV_STATUS (*__kmemsysStatePreInitLocked__)(POBJGPU, struct KernelMemorySystem *);
@ -282,6 +283,7 @@ NV_STATUS __nvoc_objCreate_KernelMemorySystem(KernelMemorySystem**, Dynamic*, Nv
#define kmemsysStateInitLocked(pGpu, pKernelMemorySystem) kmemsysStateInitLocked_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysStatePreLoad(pGpu, pKernelMemorySystem, flags) kmemsysStatePreLoad_DISPATCH(pGpu, pKernelMemorySystem, flags)
#define kmemsysStatePostLoad(pGpu, pKernelMemorySystem, flags) kmemsysStatePostLoad_DISPATCH(pGpu, pKernelMemorySystem, flags)
#define kmemsysStatePreUnload(pGpu, pKernelMemorySystem, flags) kmemsysStatePreUnload_DISPATCH(pGpu, pKernelMemorySystem, flags)
#define kmemsysStateDestroy(pGpu, pKernelMemorySystem) kmemsysStateDestroy_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysGetFbNumaInfo(pGpu, pKernelMemorySystem, physAddr, numaNodeId) kmemsysGetFbNumaInfo_DISPATCH(pGpu, pKernelMemorySystem, physAddr, numaNodeId)
#define kmemsysGetFbNumaInfo_HAL(pGpu, pKernelMemorySystem, physAddr, numaNodeId) kmemsysGetFbNumaInfo_DISPATCH(pGpu, pKernelMemorySystem, physAddr, numaNodeId)
@ -319,10 +321,11 @@ NV_STATUS __nvoc_objCreate_KernelMemorySystem(KernelMemorySystem**, Dynamic*, Nv
#define kmemsysNumaRemoveAllMemory_HAL(pGpu, pKernelMemorySystem) kmemsysNumaRemoveAllMemory_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysSetupAllAtsPeers(pGpu, pKernelMemorySystem) kmemsysSetupAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysSetupAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysRemoveAllAtsPeers(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
#define kmemsysStateLoad(pGpu, pEngstate, arg0) kmemsysStateLoad_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStateUnload(pGpu, pEngstate, arg0) kmemsysStateUnload_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStatePostUnload(pGpu, pEngstate, arg0) kmemsysStatePostUnload_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStatePreUnload(pGpu, pEngstate, arg0) kmemsysStatePreUnload_DISPATCH(pGpu, pEngstate, arg0)
#define kmemsysStateInitUnlocked(pGpu, pEngstate) kmemsysStateInitUnlocked_DISPATCH(pGpu, pEngstate)
#define kmemsysInitMissing(pGpu, pEngstate) kmemsysInitMissing_DISPATCH(pGpu, pEngstate)
#define kmemsysStatePreInitLocked(pGpu, pEngstate) kmemsysStatePreInitLocked_DISPATCH(pGpu, pEngstate)
@ -536,6 +539,12 @@ static inline NV_STATUS kmemsysStatePostLoad_DISPATCH(OBJGPU *pGpu, struct Kerne
return pKernelMemorySystem->__kmemsysStatePostLoad__(pGpu, pKernelMemorySystem, flags);
}
NV_STATUS kmemsysStatePreUnload_IMPL(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem, NvU32 flags);
static inline NV_STATUS kmemsysStatePreUnload_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem, NvU32 flags) {
return pKernelMemorySystem->__kmemsysStatePreUnload__(pGpu, pKernelMemorySystem, flags);
}
void kmemsysStateDestroy_IMPL(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
static inline void kmemsysStateDestroy_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
@ -714,6 +723,16 @@ static inline NV_STATUS kmemsysSetupAllAtsPeers_DISPATCH(OBJGPU *pGpu, struct Ke
return pKernelMemorySystem->__kmemsysSetupAllAtsPeers__(pGpu, pKernelMemorySystem);
}
static inline void kmemsysRemoveAllAtsPeers_b3696a(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
return;
}
void kmemsysRemoveAllAtsPeers_GV100(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
static inline void kmemsysRemoveAllAtsPeers_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
pKernelMemorySystem->__kmemsysRemoveAllAtsPeers__(pGpu, pKernelMemorySystem);
}
static inline NV_STATUS kmemsysStateLoad_DISPATCH(POBJGPU pGpu, struct KernelMemorySystem *pEngstate, NvU32 arg0) {
return pEngstate->__kmemsysStateLoad__(pGpu, pEngstate, arg0);
}
@ -726,10 +745,6 @@ static inline NV_STATUS kmemsysStatePostUnload_DISPATCH(POBJGPU pGpu, struct Ker
return pEngstate->__kmemsysStatePostUnload__(pGpu, pEngstate, arg0);
}
static inline NV_STATUS kmemsysStatePreUnload_DISPATCH(POBJGPU pGpu, struct KernelMemorySystem *pEngstate, NvU32 arg0) {
return pEngstate->__kmemsysStatePreUnload__(pGpu, pEngstate, arg0);
}
static inline NV_STATUS kmemsysStateInitUnlocked_DISPATCH(POBJGPU pGpu, struct KernelMemorySystem *pEngstate) {
return pEngstate->__kmemsysStateInitUnlocked__(pGpu, pEngstate);
}

View File

@ -980,6 +980,10 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x26B1, 0x16a1, 0x103c, "NVIDIA RTX 6000 Ada Generation" },
{ 0x26B1, 0x16a1, 0x10de, "NVIDIA RTX 6000 Ada Generation" },
{ 0x26B1, 0x16a1, 0x17aa, "NVIDIA RTX 6000 Ada Generation" },
{ 0x26B2, 0x17fa, 0x1028, "NVIDIA RTX 5000 Ada Generation" },
{ 0x26B2, 0x17fa, 0x103c, "NVIDIA RTX 5000 Ada Generation" },
{ 0x26B2, 0x17fa, 0x10de, "NVIDIA RTX 5000 Ada Generation" },
{ 0x26B2, 0x17fa, 0x17aa, "NVIDIA RTX 5000 Ada Generation" },
{ 0x26B5, 0x169d, 0x10de, "NVIDIA L40" },
{ 0x26B5, 0x17da, 0x10de, "NVIDIA L40" },
{ 0x2704, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080" },
@ -993,6 +997,10 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B0, 0x16fa, 0x10de, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B0, 0x16fa, 0x17aa, "NVIDIA RTX 4000 SFF Ada Generation" },
{ 0x27B2, 0x181b, 0x1028, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B2, 0x181b, 0x103c, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B2, 0x181b, 0x10de, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B2, 0x181b, 0x17aa, "NVIDIA RTX 4000 Ada Generation" },
{ 0x27B8, 0x16ca, 0x10de, "NVIDIA L4" },
{ 0x27B8, 0x16ee, 0x10de, "NVIDIA L4" },
{ 0x27BA, 0x0000, 0x0000, "NVIDIA RTX 4000 Ada Generation Laptop GPU" },
@ -1485,17 +1493,6 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x20B7, 0x1590, 0x10DE, "NVIDIA A30-24C" },
{ 0x20B7, 0x1610, 0x10DE, "NVIDIA A30-1-6CME" },
{ 0x20B7, 0x183c, 0x10DE, "NVIDIA A30-2-12CME" },
{ 0x20BF, 0x4450, 0x10DE, "GRID A100B-4C" },
{ 0x20BF, 0x4451, 0x10DE, "GRID A100B-5C" },
{ 0x20BF, 0x4452, 0x10DE, "GRID A100B-8C" },
{ 0x20BF, 0x4453, 0x10DE, "GRID A100B-10C" },
{ 0x20BF, 0x4454, 0x10DE, "GRID A100B-20C" },
{ 0x20BF, 0x4455, 0x10DE, "GRID A100B-40C" },
{ 0x20BF, 0x5560, 0x10DE, "GRID A100B-1-5C" },
{ 0x20BF, 0x5561, 0x10DE, "GRID A100B-2-10C" },
{ 0x20BF, 0x5562, 0x10DE, "GRID A100B-3-20C" },
{ 0x20BF, 0x5563, 0x10DE, "GRID A100B-4-20C" },
{ 0x20BF, 0x5564, 0x10DE, "GRID A100B-7-40C" },
{ 0x20F1, 0x1493, 0x10DE, "GRID A100-1-5C" },
{ 0x20F1, 0x1494, 0x10DE, "GRID A100-2-10C" },
{ 0x20F1, 0x1495, 0x10DE, "GRID A100-3-20C" },
@ -1775,6 +1772,21 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2322, 0x17ee, 0x10DE, "NVIDIA H800-40C" },
{ 0x2322, 0x17ef, 0x10DE, "NVIDIA H800-80C" },
{ 0x2322, 0x1845, 0x10DE, "NVIDIA H800-1-20C" },
{ 0x2330, 0x187a, 0x10DE, "NVIDIA H100XM-1-10CME" },
{ 0x2330, 0x187b, 0x10DE, "NVIDIA H100XM-1-10C" },
{ 0x2330, 0x187c, 0x10DE, "NVIDIA H100XM-1-20C" },
{ 0x2330, 0x187d, 0x10DE, "NVIDIA H100XM-2-20C" },
{ 0x2330, 0x187e, 0x10DE, "NVIDIA H100XM-3-40C" },
{ 0x2330, 0x187f, 0x10DE, "NVIDIA H100XM-4-40C" },
{ 0x2330, 0x1880, 0x10DE, "NVIDIA H100XM-7-80C" },
{ 0x2330, 0x1881, 0x10DE, "NVIDIA H100XM-4C" },
{ 0x2330, 0x1882, 0x10DE, "NVIDIA H100XM-5C" },
{ 0x2330, 0x1883, 0x10DE, "NVIDIA H100XM-8C" },
{ 0x2330, 0x1884, 0x10DE, "NVIDIA H100XM-10C" },
{ 0x2330, 0x1885, 0x10DE, "NVIDIA H100XM-16C" },
{ 0x2330, 0x1886, 0x10DE, "NVIDIA H100XM-20C" },
{ 0x2330, 0x1887, 0x10DE, "NVIDIA H100XM-40C" },
{ 0x2330, 0x1888, 0x10DE, "NVIDIA H100XM-80C" },
{ 0x2331, 0x16d3, 0x10DE, "NVIDIA H100-1-10C" },
{ 0x2331, 0x16d4, 0x10DE, "NVIDIA H100-2-20C" },
{ 0x2331, 0x16d5, 0x10DE, "NVIDIA H100-3-40C" },

View File

@ -294,11 +294,12 @@ typedef void OSFlushCpuWriteCombineBuffer(void);
typedef NV_STATUS OSNumaMemblockSize(NvU64 *);
typedef NvBool OSNumaOnliningEnabled(OS_GPU_INFO *);
typedef NV_STATUS OSAllocPagesNode(NvS32, NvLength, NvU32, NvU64 *);
typedef NV_STATUS OSAllocAcquirePage(NvU64);
typedef NV_STATUS OSAllocReleasePage(NvU64);
typedef void OSAllocAcquirePage(NvU64, NvU32);
typedef void OSAllocReleasePage(NvU64, NvU32);
typedef NvU32 OSGetPageRefcount(NvU64);
typedef NvU32 OSCountTailPages(NvU64);
typedef NvU64 OSGetPageSize(void);
typedef NvU8 OSGetPageShift(void);
// We use osAcquireRmSema to catch "unported" sema code to new lock model
@ -1318,6 +1319,7 @@ OSAllocPagesInternal osAllocPagesInternal;
OSFreePagesInternal osFreePagesInternal;
OSGetPageSize osGetPageSize;
OSGetPageShift osGetPageShift;
OSNumaMemblockSize osNumaMemblockSize;
OSNumaOnliningEnabled osNumaOnliningEnabled;
OSAllocPagesNode osAllocPagesNode;

View File

@ -1382,6 +1382,21 @@ static vmiopd_mdesc_t vmiopd_mdesc_t_rpc_rmfs_test_v15_00 = {
};
#endif
#ifndef SKIP_PRINT_rpc_ecc_notifier_write_ack_v23_05
static vmiopd_fdesc_t vmiopd_fdesc_t_rpc_ecc_notifier_write_ack_v23_05[] = {
{
.vtype = vt_end
}
};
static vmiopd_mdesc_t vmiopd_mdesc_t_rpc_ecc_notifier_write_ack_v23_05 = {
#if (defined(DEBUG) || defined(DEVELOP))
.name = "rpc_ecc_notifier_write_ack",
#endif
.fdesc = vmiopd_fdesc_t_rpc_ecc_notifier_write_ack_v23_05
};
#endif
#ifndef SKIP_PRINT_rpc_gsp_set_system_info_v17_00
static vmiopd_fdesc_t vmiopd_fdesc_t_rpc_gsp_set_system_info_v17_00[] = {
{
@ -2872,6 +2887,13 @@ vmiopd_mdesc_t *rpcdebugRmfsTest_v15_00(void)
}
#endif
#ifndef SKIP_PRINT_rpc_ecc_notifier_write_ack_v23_05
vmiopd_mdesc_t *rpcdebugEccNotifierWriteAck_v23_05(void)
{
return &vmiopd_mdesc_t_rpc_ecc_notifier_write_ack_v23_05;
}
#endif
#ifndef SKIP_PRINT_rpc_gsp_set_system_info_v17_00
vmiopd_mdesc_t *rpcdebugGspSetSystemInfo_v17_00(void)
{

View File

@ -66,6 +66,7 @@ typedef struct RPC_OBJ_IFACES {
typedef NV_STATUS RpcVgpuPfRegRead32(POBJGPU, POBJRPC, NvU64, NvU32*, NvU32);
typedef NV_STATUS RpcDumpProtobufComponent(POBJGPU, POBJRPC, PRB_ENCODER *pPrbEnc, NVD_STATE *pNvDumpState, NVDUMP_COMPONENT component);
typedef NV_STATUS RpcEccNotifierWriteAck(POBJGPU, POBJRPC);
typedef NV_STATUS RpcAllocMemory(POBJGPU, POBJRPC, NvHandle, NvHandle, NvHandle,
NvU32, NvU32, MEMORY_DESCRIPTOR*);
typedef NV_STATUS RpcGpuExecRegOps(POBJGPU, POBJRPC, NvHandle, NvHandle,
@ -100,6 +101,7 @@ typedef NV_STATUS Rpc_iGrp_ipVersions_getInfo(IGRP_IP_VERSIONS_TABLE_INFO *
typedef struct RPC_HAL_IFACES {
RpcVgpuPfRegRead32 *rpcVgpuPfRegRead32; /* Read reg value from plugin */
RpcDumpProtobufComponent *rpcDumpProtobufComponent; /* Dump a GSP component into the protobuf. */
RpcEccNotifierWriteAck *rpcEccNotifierWriteAck; /* ECC_NOTIFIER_WRITE_ACK */
RpcAllocMemory *rpcAllocMemory; /* ALLOC_MEMORY */
RpcGpuExecRegOps *rpcGpuExecRegOps; /* GPU_EXEC_REG_OPS */
RpcRmfsInit *rpcRmfsInit; /* Resman File Streaming Init */
@ -130,6 +132,8 @@ typedef struct RPC_HAL_IFACES {
(_pRpc)->_hal.rpcVgpuPfRegRead32(_pGpu, _pRpc, _arg0, _pArg1, _arg2)
#define rpcDumpProtobufComponent_HAL(_pGpu, _pRpc, _pPrbEnc, _pNvDumpState, _component) \
(_pRpc)->_hal.rpcDumpProtobufComponent(_pGpu, _pRpc, _pPrbEnc, _pNvDumpState, _component)
#define rpcEccNotifierWriteAck_HAL(_pGpu, _pRpc) \
(_pRpc)->_hal.rpcEccNotifierWriteAck(_pGpu, _pRpc)
#define rpcAllocMemory_HAL(_pGpu, _pRpc, _arg0, _arg1, _arg2, _arg3, _arg4, _pArg5) \
(_pRpc)->_hal.rpcAllocMemory(_pGpu, _pRpc, _arg0, _arg1, _arg2, _arg3, _arg4, _pArg5)
#define rpcGpuExecRegOps_HAL(_pGpu, _pRpc, _arg0, _arg1, _pArg2, _pArg3) \

View File

@ -21,6 +21,10 @@ RpcVgpuPfRegRead32 rpcVgpuPfRegRead32_STUB; // TU10X, GA100, GA
RpcDumpProtobufComponent rpcDumpProtobufComponent_v18_12;
RpcDumpProtobufComponent rpcDumpProtobufComponent_STUB; // TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
// RPC:ECC_NOTIFIER_WRITE_ACK
RpcEccNotifierWriteAck rpcEccNotifierWriteAck_v23_05;
RpcEccNotifierWriteAck rpcEccNotifierWriteAck_STUB; // TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
// RPC:ALLOC_MEMORY
RpcAllocMemory rpcAllocMemory_v13_01;
RpcAllocMemory rpcAllocMemory_STUB; // TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
@ -2456,6 +2460,8 @@ static NV_STATUS rpc_iGrp_ipVersions_Wrapup(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
pRpcHal->rpcVgpuPfRegRead32 = rpcVgpuPfRegRead32_v15_00;
if (IsIPVersionInRange(pRpc, 0x18120000, 0xFFFFFFFF))
pRpcHal->rpcDumpProtobufComponent = rpcDumpProtobufComponent_v18_12;
if (IsIPVersionInRange(pRpc, 0x23050000, 0xFFFFFFFF))
pRpcHal->rpcEccNotifierWriteAck = rpcEccNotifierWriteAck_v23_05;
if (IsIPVersionInRange(pRpc, 0x13010000, 0xFFFFFFFF))
pRpcHal->rpcAllocMemory = rpcAllocMemory_v13_01;
if (IsIPVersionInRange(pRpc, 0x12010000, 0xFFFFFFFF))
@ -2526,6 +2532,7 @@ static NV_STATUS rpc_iGrp_ipVersions_Wrapup(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcVgpuPfRegRead32);
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcDumpProtobufComponent);
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcEccNotifierWriteAck);
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcAllocMemory);
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcGpuExecRegOps);
_RPC_HAL_VERIFY_INTERFACE(pRpcHal->rpcRmfsInit);
@ -3127,6 +3134,7 @@ static void rpcHalIfacesSetup_TU102(RPC_HAL_IFACES *pRpcHal)
{
rpcVgpuPfRegRead32_STUB, // rpcVgpuPfRegRead32
rpcDumpProtobufComponent_STUB, // rpcDumpProtobufComponent
rpcEccNotifierWriteAck_STUB, // rpcEccNotifierWriteAck
rpcAllocMemory_STUB, // rpcAllocMemory
rpcGpuExecRegOps_STUB, // rpcGpuExecRegOps
rpcRmfsInit_STUB, // rpcRmfsInit
@ -3200,6 +3208,7 @@ static void rpcHalIfacesSetup_GA100(RPC_HAL_IFACES *pRpcHal)
{
rpcVgpuPfRegRead32_STUB, // rpcVgpuPfRegRead32
rpcDumpProtobufComponent_STUB, // rpcDumpProtobufComponent
rpcEccNotifierWriteAck_STUB, // rpcEccNotifierWriteAck
rpcAllocMemory_STUB, // rpcAllocMemory
rpcGpuExecRegOps_STUB, // rpcGpuExecRegOps
rpcRmfsInit_STUB, // rpcRmfsInit
@ -3283,6 +3292,7 @@ static void rpcHalIfacesSetup_AD102(RPC_HAL_IFACES *pRpcHal)
{
rpcVgpuPfRegRead32_STUB, // rpcVgpuPfRegRead32
rpcDumpProtobufComponent_STUB, // rpcDumpProtobufComponent
rpcEccNotifierWriteAck_STUB, // rpcEccNotifierWriteAck
rpcAllocMemory_STUB, // rpcAllocMemory
rpcGpuExecRegOps_STUB, // rpcGpuExecRegOps
rpcRmfsInit_STUB, // rpcRmfsInit
@ -3356,6 +3366,7 @@ static void rpcHalIfacesSetup_GH100(RPC_HAL_IFACES *pRpcHal)
{
rpcVgpuPfRegRead32_STUB, // rpcVgpuPfRegRead32
rpcDumpProtobufComponent_STUB, // rpcDumpProtobufComponent
rpcEccNotifierWriteAck_STUB, // rpcEccNotifierWriteAck
rpcAllocMemory_STUB, // rpcAllocMemory
rpcGpuExecRegOps_STUB, // rpcGpuExecRegOps
rpcRmfsInit_STUB, // rpcRmfsInit

View File

@ -315,11 +315,11 @@ typedef struct SYS_STATIC_CONFIG
/*! Indicates the type of OS flavor */
NvU32 osType;
/*! AMD SEV (AMD's Secure Encrypted Virtualization) Status */
NvU32 osSevStatus;
/*! Indicates confidentail compute OS support is enabled or not */
NvBool bOsCCEnabled;
/*! Indicates AMD SEV is enabled or not */
NvBool bOsSevEnabled;
/*! Indicates Intel TDX confidentail compute OS support is enabled or not */
NvBool bOsCCTdxEnabled;
} SYS_STATIC_CONFIG;
typedef enum

View File

@ -321,10 +321,6 @@ typedef struct UvmGpuChannelAllocParams_tag
// The next two fields store UVM_BUFFER_LOCATION values
NvU32 gpFifoLoc;
NvU32 gpPutLoc;
// Allocate the channel as secure. This flag should only be set when
// Confidential Compute is enabled.
NvBool secure;
} UvmGpuChannelAllocParams;
typedef struct UvmGpuPagingChannelAllocParams_tag
@ -368,9 +364,6 @@ typedef struct
// True if the CE can be used for P2P transactions
NvBool p2p:1;
// True if the CE supports encryption
NvBool secure:1;
// Mask of physical CEs assigned to this LCE
//
// The value returned by RM for this field may change when a GPU is

View File

@ -673,6 +673,17 @@ static inline void NV_RM_RPC_SIM_UPDATE_DISP_CHANNEL_INFO(OBJGPU *pGpu, ...) { r
status = NV_ERR_INSUFFICIENT_RESOURCES; \
} while (0)
#define NV_RM_RPC_ECC_NOTIFIER_WRITE_ACK(pGpu, status) \
do \
{ \
OBJRPC *pRpc = GPU_GET_RPC(pGpu); \
NV_ASSERT(pRpc != NULL); \
if ((status == NV_OK) && (pRpc != NULL)) \
status = rpcEccNotifierWriteAck_HAL(pGpu, pRpc); \
else if (pRpc == NULL) \
status = NV_ERR_INSUFFICIENT_RESOURCES; \
} while (0)
#define NV_RM_RPC_RMFS_INIT(pGpu, statusQueueMemDesc, status) do {} while(0)
#define NV_RM_RPC_RMFS_CLOSE_QUEUE(pGpu, status) do {} while(0)

View File

@ -207,6 +207,8 @@ enum {
X(RM, CTRL_SET_HS_CREDITS) // 198
X(RM, CTRL_PM_AREA_PC_SAMPLER) // 199
X(RM, INVALIDATE_TLB) // 200
X(RM, RESERVED_201) // 201
X(RM, ECC_NOTIFIER_WRITE_ACK) // 202
X(RM, NUM_FUNCTIONS) //END
#ifdef DEFINING_X_IN_RPC_GLOBAL_ENUMS_H
};

View File

@ -2003,3 +2003,27 @@ done:
}
return status;
}
/*
* Sends ack from CPU-RM to GSP-RM that ECC error
* notifier write has completed.
*/
NV_STATUS rpcEccNotifierWriteAck_v23_05
(
OBJGPU *pGpu,
OBJRPC *pRpc
)
{
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
if (IS_GSP_CLIENT(pGpu))
{
status = rpcWriteCommonHeader(pGpu, pRpc, NV_VGPU_MSG_FUNCTION_ECC_NOTIFIER_WRITE_ACK, 0);
if (status != NV_OK)
return status;
status = _issueRpcAsync(pGpu, pRpc);
}
return status;
}

View File

@ -66,7 +66,7 @@ dispswConstruct_IMPL
if (!pKernelDisplay)
{
NV_PRINTF(LEVEL_ERROR, "Display is not enabled, can't create class\n");
NV_PRINTF(LEVEL_INFO, "Display is not enabled, can't create class\n");
return (NV_ERR_INVALID_ARGUMENT);
}

View File

@ -484,6 +484,7 @@ kbusStatePostLoad_GM107
}
}
// Call _kbusLinkP2P_GM107 only in case of Linked SLI and Unlinked SLI. Bug 4182245
if ((pKernelBif != NULL)
&&
// RM managed P2P or restoring the HW state for OS resume
@ -491,7 +492,9 @@ kbusStatePostLoad_GM107
(flags & GPU_STATE_FLAGS_PM_TRANSITION))
&&
(!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)))
!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED))
&&
!gpuIsSelfHosted(pGpu))
{
_kbusLinkP2P_GM107(pGpu, pKernelBus);
}
@ -538,6 +541,7 @@ kbusStateUnload_GM107
if (IS_VIRTUAL(pGpu) && !(flags & GPU_STATE_FLAGS_PRESERVING))
return NV_OK;
// Call kbusUnlinkP2P_HAL only in case of Linked SLI and Unliked SLI. Bug 4182245
if ((pKernelBif != NULL)
&&
(!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
@ -545,7 +549,8 @@ kbusStateUnload_GM107
&&
// RM managed P2P or unconfiguring HW P2P for OS suspend/hibernate
(!kbusIsP2pMailboxClientAllocated(pKernelBus) ||
(flags & GPU_STATE_FLAGS_PM_TRANSITION)))
(flags & GPU_STATE_FLAGS_PM_TRANSITION))
&& !gpuIsSelfHosted(pGpu))
{
kbusUnlinkP2P_HAL(pGpu, pKernelBus);
}
@ -3526,9 +3531,12 @@ kbusStateDestroy_GM107
// clean up private info block
//
// Call _kbusDestroyP2P_GM107 only in case of Linked SLI and Unlinked SLI. Bug 4182245
if ((pKernelBif != NULL) && ((!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)) &&
(kbusIsP2pInitialized(pKernelBus))))
(kbusIsP2pInitialized(pKernelBus))) &&
!gpuIsSelfHosted(pGpu))
{
(void)_kbusDestroyP2P_GM107(pGpu, pKernelBus);
}

View File

@ -76,9 +76,13 @@ confComputeApiCtrlCmdSystemGetCapabilities_IMPL
LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner());
pParams->cpuCapability = NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_NONE;
if ((sysGetStaticConfig(pSys))->bOsSevEnabled)
if ((sysGetStaticConfig(pSys))->bOsCCEnabled)
{
pParams->cpuCapability = NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_AMD_SEV;
if ((sysGetStaticConfig(pSys))->bOsCCTdxEnabled)
{
pParams->cpuCapability = NV_CONF_COMPUTE_SYSTEM_CPU_CAPABILITY_INTEL_TDX;
}
}
pParams->gpusCapability = NV_CONF_COMPUTE_SYSTEM_GPUS_CAPABILITY_NONE;

View File

@ -85,10 +85,13 @@ kfifoEngineInfoXlate_GA100
if (outType == ENGINE_INFO_TYPE_MMU_FAULT_ID)
{
NvU32 grIdx, startSubctxId;
NV_STATUS status;
RM_ENGINE_TYPE rmEngineType;
NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_GV100(pGpu, pKernelFifo, inType, inVal,
ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&rmEngineType));
status = kfifoEngineInfoXlate_GV100(pGpu, pKernelFifo, inType, inVal,
ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&rmEngineType);
if (status != NV_OK)
return status;
// check if rmEngineType corresponding to input is GR
if (RM_ENGINE_TYPE_IS_GR(rmEngineType))

View File

@ -1087,7 +1087,7 @@ memdescAlloc
// use such memory has to be unprotected as protected sysmem is not
// accessible to GPU
//
if ((sysGetStaticConfig(pSys))->bOsSevEnabled)
if ((sysGetStaticConfig(pSys))->bOsCCEnabled)
{
if (!gpuIsCCorApmFeatureEnabled(pGpu) ||
(gpuIsApmFeatureEnabled(pGpu) &&

View File

@ -189,6 +189,8 @@ NV_STATUS _pmaNumaAllocateRange
NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
*allocatedCount = 0;
NV_ASSERT_OR_RETURN(actualSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
// check if numFreeFrames(64KB) are below a certain % of PMA managed memory(indicated by num2mbPages).
if (_pmaCheckFreeFramesToSkipReclaim(pPma))
{
@ -202,12 +204,10 @@ NV_STATUS _pmaNumaAllocateRange
if (status == NV_OK)
{
NvU32 j;
// j=0 head page is already refcounted at allocation
for (j = 1; j < (actualSize >> PMA_PAGE_SHIFT); j++)
{
osAllocAcquirePage(sysPhysAddr + (j << PMA_PAGE_SHIFT));
}
NvU8 osPageShift = osGetPageShift();
// Skip the first page as it is refcounted at allocation.
osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (actualSize >> osPageShift) - 1);
gpaPhysAddr = sysPhysAddr - pPma->coherentCpuFbBase;
NV_ASSERT(gpaPhysAddr < pPma->coherentCpuFbBase);
@ -330,10 +330,12 @@ static NV_STATUS _pmaNumaAllocatePages
{
NV_STATUS status = NV_ERR_NO_MEMORY;
NvU64 sysPhysAddr;
NvU64 i = 0, j = 0;
NvU64 i = 0;
NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
NvU8 osPageShift = osGetPageShift();
NV_ASSERT(allocationCount);
NV_ASSERT_OR_RETURN(pageSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
// check if numFreeFrames are below certain % of PMA managed memory.
if (_pmaCheckFreeFramesToSkipReclaim(pPma))
@ -357,11 +359,8 @@ static NV_STATUS _pmaNumaAllocatePages
NV_ASSERT(sysPhysAddr >= pPma->coherentCpuFbBase);
pPages[i] = sysPhysAddr - pPma->coherentCpuFbBase;
// Skip the head page at offset 0 (j=0) as it is refcounted at allocation
for (j = 1; j < (pageSize >> PMA_PAGE_SHIFT); j++)
{
osAllocAcquirePage(sysPhysAddr + (j << PMA_PAGE_SHIFT));
}
// Skip the first page as it is refcounted at allocation.
osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
}
if (bScrubOnAlloc)
@ -658,6 +657,9 @@ void pmaNumaFreeInternal
)
{
NvU64 i, j;
NvU8 osPageShift = osGetPageShift();
NV_ASSERT_OR_RETURN_VOID(PMA_PAGE_SHIFT >= osPageShift);
NV_PRINTF(LEVEL_INFO, "Freeing pPage[0] = %llx pageCount %lld\n", pPages[0], pageCount);
@ -704,7 +706,7 @@ void pmaNumaFreeInternal
continue;
}
sysPagePhysAddr = sysPhysAddr + (j << PMA_PAGE_SHIFT);
osAllocReleasePage(sysPagePhysAddr);
osAllocReleasePage(sysPagePhysAddr, 1 << (PMA_PAGE_SHIFT - osPageShift));
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), newStatus, ~ATTRIB_EVICTING);
}
}

View File

@ -41,7 +41,7 @@ static NvU64 osCountTailPages(NvU64 sysPagePhysAddr)
return 0;
}
static void osAllocReleasePage(NvU64 sysPagePhysAddr)
static void osAllocReleasePage(NvU64 sysPagePhysAddr, NvU32 pageCount)
{
return;
}
@ -51,6 +51,11 @@ static NV_STATUS osOfflinePageAtAddress(NvU64 address)
return NV_ERR_GENERIC;
}
static NvU8 osGetPageShift(void)
{
return 0;
}
NV_STATUS scrubCheck(OBJMEMSCRUB *pScrubber, PSCRUB_NODE *ppList, NvU64 *size)
{
return NV_ERR_GENERIC;
@ -378,6 +383,10 @@ _pmaCleanupNumaReusePages
// Since we set the NUMA_REUSE bit when we decide to reuse the pages,
// we know exactly which pages to free both to OS and in PMA bitmap.
//
NvU8 osPageShift = osGetPageShift();
NV_ASSERT_OR_RETURN(PMA_PAGE_SHIFT >= osPageShift, NV_ERR_INVALID_STATE);
for (i = 0; i < numFrames; i++)
{
currentStatus = pPma->pMapInfo->pmaMapRead(pPma->pRegions[regId], (frameNum + i), NV_TRUE);
@ -385,7 +394,7 @@ _pmaCleanupNumaReusePages
if (currentStatus & ATTRIB_NUMA_REUSE)
{
osAllocReleasePage(sysPagePhysAddr);
osAllocReleasePage(sysPagePhysAddr, 1 << (PMA_PAGE_SHIFT - osPageShift));
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + i),
STATE_FREE, (STATE_MASK | ATTRIB_NUMA_REUSE));
}

View File

@ -141,6 +141,51 @@ _kmemsysConfigureAtsPeers
return NV_OK;
}
/*!
* @brief Remove local GPU's peer ATS config
*
* @param[in] pLocalGpu Local GPU OBJGPU pointer
* @param[in] pLocalKernelMemorySystem Local GPU KernelMemorySystem pointer
* @param[in] peerId peer id from local GPU to remote GPU in
* local GPU
*
* @return NV_OK on success
*/
static
NV_STATUS
_kmemsysResetAtsPeerConfiguration
(
OBJGPU *pLocalGpu,
KernelMemorySystem *pLocalKernelMemorySystem,
NvU32 peerId
)
{
RM_API *pLocalRmApi = GPU_GET_PHYSICAL_RMAPI(pLocalGpu);
NV2080_CTRL_INTERNAL_MEMSYS_GET_LOCAL_ATS_CONFIG_PARAMS getParams = { 0 };
NV2080_CTRL_INTERNAL_MEMSYS_SET_PEER_ATS_CONFIG_PARAMS setParams = { 0 };
NV_ASSERT_OK_OR_RETURN(pLocalRmApi->Control(pLocalRmApi,
pLocalGpu->hInternalClient,
pLocalGpu->hInternalSubdevice,
NV2080_CTRL_CMD_INTERNAL_MEMSYS_GET_LOCAL_ATS_CONFIG,
&getParams,
sizeof(NV2080_CTRL_INTERNAL_MEMSYS_GET_LOCAL_ATS_CONFIG_PARAMS)));
setParams.peerId = peerId;
setParams.addrSysPhys = 0;
setParams.addrWidth = getParams.addrWidth;
setParams.mask = 0;
setParams.maskWidth = getParams.maskWidth;
NV_ASSERT_OK_OR_RETURN(pLocalRmApi->Control(pLocalRmApi,
pLocalGpu->hInternalClient,
pLocalGpu->hInternalSubdevice,
NV2080_CTRL_CMD_INTERNAL_MEMSYS_SET_PEER_ATS_CONFIG,
&setParams,
sizeof(NV2080_CTRL_INTERNAL_MEMSYS_SET_PEER_ATS_CONFIG_PARAMS)));
return NV_OK;
}
/**
* @brief Setup one pair of ATS peers (non-chiplib configs)
@ -214,6 +259,71 @@ _kmemsysSetupAtsPeers
return NV_OK;
}
/**
* @brief Remove one pair of ATS peers (non-chiplib configs)
*
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelMemorySystem Kernel Memory System pointer
* @param[in] pRemoteGpu OBJGPU pointer for the ATS peer
*
* @return NV_OK on success
*/
static
NV_STATUS
_kmemsysRemoveAtsPeers
(
OBJGPU *pGpu,
KernelMemorySystem *pKernelMemorySystem,
OBJGPU *pRemoteGpu
)
{
NvU32 peer1 = BUS_INVALID_PEER;
NvU32 peer2 = BUS_INVALID_PEER;
NV_STATUS status = NV_OK;
KernelMemorySystem *pLocalKernelMs = NULL;
KernelMemorySystem *pRemoteKernelMs = NULL;
NvU32 attributes = DRF_DEF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK) |
DRF_DEF(_P2PAPI, _ATTRIBUTES, _LINK_TYPE, _SPA);
pLocalKernelMs = pKernelMemorySystem;
pRemoteKernelMs = GPU_GET_KERNEL_MEMORY_SYSTEM(pRemoteGpu);
peer1 = kbusGetPeerId_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pRemoteGpu);
peer2 = kbusGetPeerId_HAL(pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu), pGpu);
status = kbusRemoveP2PMapping_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu),
peer1, peer2, attributes);
if (status != NV_OK)
{
return status;
}
if (pLocalKernelMs && pRemoteKernelMs &&
pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED) &&
pRemoteGpu->getProperty(pRemoteGpu, PDB_PROP_GPU_ATS_SUPPORTED))
{
status = _kmemsysResetAtsPeerConfiguration(pGpu, pLocalKernelMs, peer1);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR,
"Removing ATS p2p config between GPU%u and GPU%u "
"failed with status %x\n", pGpu->gpuInstance,
pRemoteGpu->gpuInstance, status);
}
status = _kmemsysResetAtsPeerConfiguration(pRemoteGpu, pRemoteKernelMs, peer2);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR,
"Removinging ATS p2p config between GPU%u and GPU%u "
"failed with status %x\n", pRemoteGpu->gpuInstance,
pGpu->gpuInstance, status);
}
}
return NV_OK;
}
/**
* @brief Setup ATS peer access. On GV100 and GH180, ATS peers use NVLINK.
*
@ -258,4 +368,40 @@ kmemsysSetupAllAtsPeers_GV100
return NV_OK;
}
/**
* @brief Remove ATS peer access. On GV100 and GH180, ATS peers use NVLINK.
*
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelMemorySystem Kernel Memory System pointer
*/
void
kmemsysRemoveAllAtsPeers_GV100
(
OBJGPU *pGpu,
KernelMemorySystem *pKernelMemorySystem
)
{
NvU32 gpuAttachCnt, gpuAttachMask, gpuInstance = 0;
NV_STATUS status = NV_OK;
POBJGPU pRemoteGpu = NULL;
// loop over all possible GPU pairs and remove the ATS config
gpumgrGetGpuAttachInfo(&gpuAttachCnt, &gpuAttachMask);
while ((pRemoteGpu = gpumgrGetNextGpu(gpuAttachMask, &gpuInstance)) != NULL)
{
if (pRemoteGpu == pGpu)
continue;
if (gpuIsGpuFullPower(pRemoteGpu) == NV_FALSE)
continue;
status = _kmemsysRemoveAtsPeers(pGpu, pKernelMemorySystem, pRemoteGpu);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to remove ATS peer access between GPU%d and GPU%d\n",
pGpu->gpuInstance, pRemoteGpu->gpuInstance);
}
}
}

View File

@ -221,6 +221,22 @@ kmemsysStatePostLoad_IMPL
return NV_OK;
}
NV_STATUS
kmemsysStatePreUnload_IMPL
(
OBJGPU *pGpu,
KernelMemorySystem *pKernelMemorySystem,
NvU32 flags
)
{
if (IS_SILICON(pGpu) &&
pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED))
{
kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem);
}
return NV_OK;
}
/*
* Release the state accumulated in StateInit.
* @param[in] pGpu pointer to the GPU instance.

View File

@ -1039,9 +1039,25 @@ knvlinkPrepareForXVEReset_IMPL
// initializations in between, without hanging up the GPU trying to
// flush data over links that aren't available anymore.
//
// Starting from Ampere single GPU reset is supported and hence remove
// only the nvlink's of the remote GPU's which are connected to the
// current GPU.
//
if (IsAMPEREorBetter(pGpu))
{
NvU32 remPeerId = kbusGetPeerId_HAL(pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu), pGpu);
if (remPeerId != BUS_INVALID_PEER)
status = knvlinkRemoveMapping_HAL(pRemoteGpu, pRemoteKernelNvlink, NV_FALSE,
NVBIT(remPeerId),
NV_FALSE /* bL2Entry */);
}
else
{
status = knvlinkRemoveMapping_HAL(pRemoteGpu, pRemoteKernelNvlink, NV_FALSE,
((1 << NVLINK_MAX_PEERS_SW) - 1),
NV_FALSE /* bL2Entry */);
}
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR,

View File

@ -209,7 +209,7 @@ sysmemConstruct_IMPL
memdescSetFlag(pMemDesc, MEMDESC_FLAGS_SYSMEM_OWNED_BY_CLIENT, NV_TRUE);
if ((sysGetStaticConfig(SYS_GET_INSTANCE()))->bOsSevEnabled &&
if ((sysGetStaticConfig(SYS_GET_INSTANCE()))->bOsCCEnabled &&
gpuIsCCorApmFeatureEnabled(pGpu) &&
FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED,
pAllocData->attr2))

View File

@ -1234,6 +1234,19 @@ Mellanox_BlueField_setupFunc
return NV_OK;
}
// Mellanox BlueField3 Setup Function
static NV_STATUS
Mellanox_BlueField3_setupFunc
(
OBJCL *pCl
)
{
// Bug 4151565: BlueField 3 does not support WC mapping
pCl->setProperty(pCl, PDB_PROP_CL_DISABLE_IOMAP_WC, NV_TRUE);
return NV_OK;
}
// Amazon Gravitron2 Setup Function
static NV_STATUS
Amazon_Gravitron2_setupFunc

View File

@ -1011,7 +1011,7 @@ cliresCtrlCmdSystemGetCpuInfo_IMPL
pCpuInfoParams->family = pSys->cpuInfo.family;
pCpuInfoParams->model = pSys->cpuInfo.model;
pCpuInfoParams->stepping = pSys->cpuInfo.stepping;
pCpuInfoParams->bSEVEnabled = (sysGetStaticConfig(pSys))->bOsSevEnabled;
pCpuInfoParams->bSEVEnabled = (sysGetStaticConfig(pSys))->bOsCCEnabled;
portMemCopy(pCpuInfoParams->name,
sizeof (pCpuInfoParams->name), pSys->cpuInfo.name,
sizeof (pCpuInfoParams->name));

View File

@ -4698,13 +4698,15 @@ static NV_STATUS channelAllocate(const gpuTsgHandle tsg,
pAllocInfo->gpFifoAllocParams.gpFifoOffset = channel->gpFifo;
pAllocInfo->gpFifoAllocParams.gpFifoEntries = channel->fifoEntries;
if (params->secure)
pAllocInfo->gpFifoAllocParams.flags = FLD_SET_DRF(OS04, _FLAGS, _CC_SECURE, _TRUE, pAllocInfo->gpFifoAllocParams.flags);
if (isDeviceVoltaPlus(device))
{
if (gpuIsCCorApmFeatureEnabled(pGpu))
{
// All channels are allocated as secure when the Confidential
// Computing feature is enabled.
pAllocInfo->gpFifoAllocParams.flags = FLD_SET_DRF(OS04, _FLAGS, _CC_SECURE, _TRUE,
pAllocInfo->gpFifoAllocParams.flags);
// USERD can be placed in one of the following locations
// 1. Unprotected sysmem in case of both APM and HCC
// 2. Unprotected vidmem in case of APM
@ -6585,7 +6587,6 @@ static void setCeCaps(const NvU8 *rmCeCaps, gpuCeCaps *ceCaps)
ceCaps->nvlinkP2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_NVLINK_P2P);
ceCaps->sysmem = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_SYSMEM);
ceCaps->p2p = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_P2P);
ceCaps->secure = !!NV2080_CTRL_CE_GET_CAP(rmCeCaps, NV2080_CTRL_CE_CAPS_CE_CC_SECURE);
}
static NV_STATUS queryCopyEngines(struct gpuDevice *gpu, gpuCesCaps *cesCaps)

View File

@ -1,4 +1,4 @@
NVIDIA_VERSION = 535.86.10
NVIDIA_VERSION = 535.98
# This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))