550.54.14

2024-02-23 16:37:56 +01:00 · 2024-02-23 16:37:56 +01:00 · 476bd34534
commit 476bd34534
parent 91676d6628
186 changed files with 42509 additions and 37629 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,12 @@

 ## Release 550 Entries

+### [550.54.14] 2024-02-23
+
+#### Added
+
+- Added vGPU Host and vGPU Guest support. For vGPU Host, please refer to the README.vgpu packaged in the vGPU Host Package for more details.
+
 ### [550.40.07] 2024-01-24

 #### Fixed
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 550.40.07.
+version 550.54.14.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-550.40.07 driver release.  This can be achieved by installing
+550.54.14 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -188,7 +188,10 @@ encountered specific to them.
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.07/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/550.54.14/README/kernel_open.html
+
+For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
+Package for more details.

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -650,7 +653,9 @@ Subsystem Device ID.
 | NVIDIA T400 4GB                                 | 1FF2 103C 1613 |
 | NVIDIA T400 4GB                                 | 1FF2 103C 8A80 |
 | NVIDIA T400 4GB                                 | 1FF2 10DE 1613 |
+| NVIDIA T400E                                    | 1FF2 10DE 18FF |
 | NVIDIA T400 4GB                                 | 1FF2 17AA 1613 |
+| NVIDIA T400E                                    | 1FF2 17AA 18FF |
 | Quadro T1000                                    | 1FF9           |
 | NVIDIA A100-SXM4-40GB                           | 20B0           |
 | NVIDIA A100-PG509-200                           | 20B0 10DE 1450 |
@ -746,12 +751,15 @@ Subsystem Device ID.
 | NVIDIA H800 PCIe                                | 2322 10DE 17A4 |
 | NVIDIA H800                                     | 2324 10DE 17A6 |
 | NVIDIA H800                                     | 2324 10DE 17A8 |
+| NVIDIA H20                                      | 2329 10DE 198B |
+| NVIDIA H20                                      | 2329 10DE 198C |
 | NVIDIA H100 80GB HBM3                           | 2330 10DE 16C0 |
 | NVIDIA H100 80GB HBM3                           | 2330 10DE 16C1 |
 | NVIDIA H100 PCIe                                | 2331 10DE 1626 |
 | NVIDIA H100                                     | 2339 10DE 17FC |
 | NVIDIA H800 NVL                                 | 233A 10DE 183A |
 | NVIDIA GH200 120GB                              | 2342 10DE 16EB |
+| NVIDIA GH200 120GB                              | 2342 10DE 1805 |
 | NVIDIA GH200 480GB                              | 2342 10DE 1809 |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
@ -805,6 +813,7 @@ Subsystem Device ID.
 | NVIDIA RTX A2000 12GB                           | 2571 10DE 1611 |
 | NVIDIA RTX A2000 12GB                           | 2571 17AA 1611 |
 | NVIDIA GeForce RTX 3050                         | 2582           |
+| NVIDIA GeForce RTX 3050                         | 2584           |
 | NVIDIA GeForce RTX 3050 Ti Laptop GPU           | 25A0           |
 | NVIDIA GeForce RTX 3050Ti Laptop GPU            | 25A0 103C 8928 |
 | NVIDIA GeForce RTX 3050Ti Laptop GPU            | 25A0 103C 89F9 |
@ -846,6 +855,7 @@ Subsystem Device ID.
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 103C 17FA |
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 10DE 17FA |
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 17AA 17FA |
+| NVIDIA RTX 5880 Ada Generation                  | 26B3 1028 1934 |
 | NVIDIA RTX 5880 Ada Generation                  | 26B3 103C 1934 |
 | NVIDIA RTX 5880 Ada Generation                  | 26B3 10DE 1934 |
 | NVIDIA RTX 5880 Ada Generation                  | 26B3 17AA 1934 |
@ -854,6 +864,7 @@ Subsystem Device ID.
 | NVIDIA L40S                                     | 26B9 10DE 1851 |
 | NVIDIA L40S                                     | 26B9 10DE 18CF |
 | NVIDIA L20                                      | 26BA 10DE 1957 |
+| NVIDIA GeForce RTX 4080 SUPER                   | 2702           |
 | NVIDIA GeForce RTX 4080                         | 2704           |
 | NVIDIA GeForce RTX 4070 Ti SUPER                | 2705           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2717           |
@ -891,6 +902,10 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 4060                         | 2882           |
 | NVIDIA GeForce RTX 4060 Laptop GPU              | 28A0           |
 | NVIDIA GeForce RTX 4050 Laptop GPU              | 28A1           |
+| NVIDIA RTX 2000 Ada Generation                  | 28B0 1028 1870 |
+| NVIDIA RTX 2000 Ada Generation                  | 28B0 103C 1870 |
+| NVIDIA RTX 2000 Ada Generation                  | 28B0 10DE 1870 |
+| NVIDIA RTX 2000 Ada Generation                  | 28B0 17AA 1870 |
 | NVIDIA RTX 2000 Ada Generation Laptop GPU       | 28B8           |
 | NVIDIA GeForce RTX 4060 Laptop GPU              | 28E0           |
 | NVIDIA GeForce RTX 4050 Laptop GPU              | 28E1           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@ -621,6 +621,14 @@ typedef enum
 #define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)    \
        (((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)

+/*
+ * For console setup by EFI GOP, the base address is BAR1.
+ * For console setup by VBIOS, the base address is BAR2 + 16MB.
+ */
+#define NV_IS_CONSOLE_MAPPED(nv, addr)  \
+        (((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
+         ((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
+
 #define NV_SOC_IS_ISO_IOMMU_PRESENT(nv)     \
        ((nv)->iommus.iso_iommu_present)

@ -878,6 +886,8 @@ NvBool    NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
 NvU32     NV_API_CALL nv_get_os_type(void);

 void      NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
+void      NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
+
 struct dma_buf;
 typedef struct nv_dma_buf nv_dma_buf_t;
 struct drm_gem_object;
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@ -956,12 +956,20 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
    - This function should not be called when interrupts are disabled.

    Arguments:
-        device[IN]        - Device handle associated with the gpu
+        pFaultInfo[IN]        - information provided by RM for fault handling.
+                                used for obtaining the device handle without locks.
+        bCopyAndFlush[IN]     - Instructs RM to perform the flush in the Copy+Flush mode.
+                                In this mode, RM will perform a copy of the packets from
+                                the HW buffer to UVM's SW buffer as part of performing
+                                the flush. This mode gives UVM the opportunity to observe
+                                the packets contained within the HW buffer at the time
+                                of issuing the call.

    Error codes:
      NV_ERR_INVALID_ARGUMENT
 */
-NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
+NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
+                                                   NvBool bCopyAndFlush);

 /*******************************************************************************
    nvUvmInterfaceTogglePrefetchFaults
@ -982,7 +990,8 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
    Error codes:
      NV_ERR_INVALID_ARGUMENT
 */
-NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
+NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
+                                             NvBool bEnable);

 /*******************************************************************************
    nvUvmInterfaceInitAccessCntrInfo
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@ -700,8 +700,10 @@ typedef struct UvmGpuInfo_tag
    // local EGM properties
    // NV_TRUE if EGM is enabled
    NvBool   egmEnabled;
+
    // Peer ID to reach local EGM when EGM is enabled
    NvU8     egmPeerId;
+
    // EGM base address to offset in the GMMU PTE entry for EGM mappings
    NvU64    egmBaseAddr;
 } UvmGpuInfo;
@ -712,9 +714,10 @@ typedef struct UvmGpuFbInfo_tag
    // RM regions that are not registered with PMA either.
    NvU64 maxAllocatableAddress;

-    NvU32 heapSize;         // RAM in KB available for user allocations
-    NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
-    NvBool bZeroFb;         // Zero FB mode enabled.
+    NvU32 heapSize;          // RAM in KB available for user allocations
+    NvU32 reservedHeapSize;  // RAM in KB reserved for internal RM allocation
+    NvBool bZeroFb;          // Zero FB mode enabled.
+    NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
 } UvmGpuFbInfo;

 typedef struct UvmGpuEccInfo_tag
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -160,10 +160,9 @@ NvBool      NV_API_CALL  os_is_vgx_hyper             (void);
 NV_STATUS   NV_API_CALL  os_inject_vgx_msi           (NvU16, NvU64, NvU32);
 NvBool      NV_API_CALL  os_is_grid_supported        (void);
 NvU32       NV_API_CALL  os_get_grid_csp_support     (void);
-void        NV_API_CALL  os_get_screen_info          (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
 void        NV_API_CALL  os_bug_check                (NvU32, const char *);
 NV_STATUS   NV_API_CALL  os_lock_user_pages          (void *, NvU64, void **, NvU32);
-NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **, void**);
+NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **);
 NV_STATUS   NV_API_CALL  os_unlock_user_pages        (NvU64, void *);
 NV_STATUS   NV_API_CALL  os_match_mmap_offset        (void *, NvU64, NvU64 *);
 NV_STATUS   NV_API_CALL  os_get_euid                 (NvU32 *);
@ -198,6 +197,8 @@ nv_cap_t*   NV_API_CALL  os_nv_cap_create_file_entry  (nv_cap_t *, const char *,
 void        NV_API_CALL  os_nv_cap_destroy_entry      (nv_cap_t *);
 int         NV_API_CALL  os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
 void        NV_API_CALL  os_nv_cap_close_fd           (int);
+NvS32       NV_API_CALL  os_imex_channel_get          (NvU64);
+NvS32       NV_API_CALL  os_imex_channel_count        (void);

 enum os_pci_req_atomics_type {
    OS_INTF_PCIE_REQ_ATOMICS_32BIT,
@ -219,6 +220,7 @@ extern NvU8  os_page_shift;
 extern NvBool os_cc_enabled;
 extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;
+extern NvBool os_imex_channel_is_supported;

 /*
 * ---------------------------------------------------------------------------
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@ -75,7 +75,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
-NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@ -96,5 +96,6 @@ NV_HEADER_PRESENCE_TESTS = \
  soc/tegra/bpmp.h \
  linux/sync_file.h \
  linux/cc_platform.h \
-  asm/cpufeature.h
+  asm/cpufeature.h \
+  linux/mpi.h

--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@ -58,7 +58,7 @@
 #ifndef _UVM_H_
 #define _UVM_H_

-#define UVM_API_LATEST_REVISION 9
+#define UVM_API_LATEST_REVISION 11

 #if !defined(UVM_API_REVISION)
 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@ -297,7 +297,9 @@ NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU for which pageable memory access support is queried.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition for which
+//         pageable memory access support is queried.
 //
 //     pageableMemAccess: (OUTPUT)
 //         Returns true (non-zero) if the GPU represented by gpuUuid supports
@ -327,6 +329,12 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
 // usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
 // process results in an error.
 //
+// After successfully registering a GPU partition, all subsequent API calls
+// which take a NvProcessorUuid argument (including UvmGpuMappingAttributes),
+// must use the GI partition UUID which can be obtained with
+// NvRmControl(NVC637_CTRL_CMD_GET_UUID). Otherwise, if the GPU is not SMC
+// capable or SMC enabled, the physical GPU UUID must be used.
+//
 // Arguments:
 //     gpuUuid: (INPUT)
 //         UUID of the physical GPU to register.
@ -431,7 +439,8 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to unregister.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to unregister.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@ -489,7 +498,8 @@ NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to register.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to register.
 //
 //     platformParams: (INPUT)
 //         On Linux: RM ctrl fd, hClient and hVaSpace.
@ -560,7 +570,9 @@ NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid             *gpuUuid,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU whose VA space should be unregistered.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition whose VA space
+//         should be unregistered.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@ -590,7 +602,7 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
 //
 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are
 // not connected or are connected over an interconnect different than PCIe
-// (NVLink, for example).
+// (NVLink or SMC partitions, for example).
 //
 // If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
 // must support the same set of page sizes for GPU mappings.
@ -603,10 +615,12 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuidA: (INPUT)
-//         UUID of GPU A.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition A.
 //
 //     gpuUuidB: (INPUT)
-//         UUID of GPU B.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition B.
 //
 // Error codes:
 //     NV_ERR_NO_MEMORY:
@ -652,10 +666,12 @@ NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
 //
 // Arguments:
 //     gpuUuidA: (INPUT)
-//         UUID of GPU A.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition A.
 //
 //     gpuUuidB: (INPUT)
-//         UUID of GPU B.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition B.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@ -700,7 +716,9 @@ NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//        UUID of the GPU that the channel is associated with.
+//        UUID of the physical GPU if the GPU is not SMC capable or SMC
+//        enabled, or the GPU instance UUID of the partition that the channel is
+//        associated with.
 //
 //     platformParams: (INPUT)
 //         On Linux: RM ctrl fd, hClient and hChannel.
@ -1139,11 +1157,14 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
 //         Length, in bytes, of the range.
 //
 //     preferredLocationUuid: (INPUT)
-//         UUID of the preferred location for this VA range.
+//         UUID of the CPU, UUID of the physical GPU if the GPU is not SMC
+//         capable or SMC enabled, or the GPU instance UUID of the partition of
+//         the preferred location for this VA range.
 //
 //     accessedByUuids: (INPUT)
-//         UUIDs of all processors that should have persistent mappings to this
-//         VA range.
+//         UUID of the CPU, UUID of the physical GPUs if the GPUs are not SMC
+//         capable or SMC enabled, or the GPU instance UUID of the partitions
+//         that should have persistent mappings to this VA range.
 //
 //     accessedByCount: (INPUT)
 //         Number of elements in the accessedByUuids array.
@ -1421,7 +1442,9 @@ NV_STATUS UvmAllocSemaphorePool(void                          *base,
 //         Length, in bytes, of the range.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if the destination processor is
@ -1499,7 +1522,9 @@ NV_STATUS UvmMigrate(void                  *base,
 //         Length, in bytes, of the range.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if the destination processor is
@ -1576,7 +1601,9 @@ NV_STATUS UvmMigrateAsync(void                  *base,
 //         Id of the range group whose associated VA ranges have to be migrated.
 //
 //     destinationUuid: (INPUT)
-//         UUID of the destination processor to migrate pages to.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID to
+//         migrate pages to.
 //
 // Error codes:
 //     NV_ERR_OBJECT_NOT_FOUND:
@ -1938,7 +1965,9 @@ NV_STATUS UvmMapExternalAllocation(void                              *base,
 //
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to map the sparse region on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to map the sparse
+//         region on.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@ -1995,7 +2024,9 @@ NV_STATUS UvmMapExternalSparse(void                  *base,
 //         The length of the virtual address range.
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to unmap the VA range from.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to unmap the VA
+//         range from.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@ -2062,7 +2093,9 @@ NV_STATUS UvmUnmapExternalAllocation(void                  *base,
 //         supported by the GPU.
 //
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to map the dynamic parallelism region on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to map the
+//         dynamic parallelism region on.
 //
 // Errors:
 //     NV_ERR_UVM_ADDRESS_IN_USE:
@ -2293,7 +2326,9 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //         Length, in bytes, of the range.
 //
 //     preferredLocationUuid: (INPUT)
-//         UUID of the preferred location.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         preferred location.
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if preferredLocationUuid is the
@ -2469,8 +2504,9 @@ NV_STATUS UvmUnsetPreferredLocation(void     *base,
 //         Length, in bytes, of the range.
 //
 //     accessedByUuid: (INPUT)
-//         UUID of the processor that should have pages in the the VA range
-//         mapped when possible.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         that should have pages in the VA range mapped when possible.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@ -2538,8 +2574,10 @@ NV_STATUS UvmSetAccessedBy(void                  *base,
 //         Length, in bytes, of the range.
 //
 //     accessedByUuid: (INPUT)
-//         UUID of the processor from which any policies set by
-//         UvmSetAccessedBy should be revoked for the given VA range.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID
+//         from which any policies set by UvmSetAccessedBy should be revoked
+//         for the given VA range.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@ -2597,7 +2635,9 @@ NV_STATUS UvmUnsetAccessedBy(void                  *base,
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to enable software-assisted system-wide atomics on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to enable
+//         software-assisted system-wide atomics on.
 //
 // Error codes:
 //     NV_ERR_NO_MEMORY:
@ -2633,7 +2673,9 @@ NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to disable software-assisted system-wide atomics on.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition to disable
+//         software-assisted system-wide atomics on.
 //
 // Error codes:
 //     NV_ERR_INVALID_DEVICE:
@ -2862,7 +2904,9 @@ NV_STATUS UvmDebugCountersEnable(UvmDebugSession   session,
 //         Name of the counter in that scope.
 //
 //     gpu: (INPUT)
-//         Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, or the GPU instance UUID of the partition of the scoped GPU.
+//         This parameter is ignored in AllGpu scopes.
 //
 //     pCounterHandle: (OUTPUT)
 //         Handle to the counter address.
@ -2916,7 +2960,7 @@ NV_STATUS UvmDebugGetCounterVal(UvmDebugSession     session,
 // UvmEventQueueCreate
 //
 // This call creates an event queue of the given size.
-// No events are added in the queue till they are enabled by the user.
+// No events are added in the queue until they are enabled by the user.
 // Event queue data is visible to the user even after the target process dies
 // if the session is active and queue is not freed.
 //
@ -2967,7 +3011,7 @@ NV_STATUS UvmEventQueueCreate(UvmDebugSession        sessionHandle,
 // UvmEventQueueDestroy
 //
 // This call frees all interal resources associated with the queue, including
-// upinning of the memory associated with that queue. Freeing user buffer is
+// unpinning of the memory associated with that queue. Freeing user buffer is
 // responsibility of a caller. Event queue might be also destroyed as a side
 // effect of destroying a session associated with this queue.
 //
@ -3151,9 +3195,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
 // UvmEventGetGpuUuidTable
 //
 // Each migration event entry contains the gpu index to/from where data is
-// migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
-// Using indices saves on the size of each event entry. This API provides the
-// gpuIndex to gpuUuid relation to the user.
+// migrated. This index maps to a corresponding physical gpu UUID in the
+// gpuUuidTable. Using indices saves on the size of each event entry. This API
+// provides the gpuIndex to gpuUuid relation to the user.
 //
 // This API does not access the queue state maintained in the user
 // library and so the user doesn't need to acquire a lock to protect the
@ -3161,9 +3205,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
 //
 // Arguments:
 //     gpuUuidTable: (OUTPUT)
-//         The return value is an array of UUIDs. The array index is the
-//         corresponding gpuIndex. There can be at max 32 gpus associated with
-//         UVM, so array size is 32.
+//         The return value is an array of physical GPU UUIDs. The array index
+//         is the corresponding gpuIndex. There can be at max 32 GPUs
+//         associated with UVM, so array size is 32.
 //
 //     validCount: (OUTPUT)
 //         The system doesn't normally contain 32 GPUs. This field gives the
@ -3222,7 +3266,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
 //------------------------------------------------------------------------------
 NV_STATUS UvmEventFetch(UvmDebugSession      sessionHandle,
                        UvmEventQueueHandle  queueHandle,
-                        UvmEventEntry       *pBuffer,
+                        UvmEventEntry_V1    *pBuffer,
                        NvU64               *nEntries);

 //------------------------------------------------------------------------------
@ -3418,10 +3462,15 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
 // 4. Destroy event Queue using UvmToolsDestroyEventQueue
 //

-
+#if UVM_API_REV_IS_AT_MOST(10)
+// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
+// sizeof(UvmToolsEventControlData_V2).
 NvLength UvmToolsGetEventControlSize(void);

+// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
+// sizeof(UvmEventEntry_V2).
 NvLength UvmToolsGetEventEntrySize(void);
+#endif

 NvLength UvmToolsGetNumberOfCounters(void);

@ -3436,6 +3485,12 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //     session: (INPUT)
 //         Handle to the tools session.
 //
+//     version: (INPUT)
+//         Requested version for events or counters.
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
+//         UvmToolsEventControlData_V2::version records the entry version that
+//         will be generated.
+//
 //     event_buffer: (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
 //         hold at least event_buffer_size events. Gets pinned until queue is
@ -3447,10 +3502,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //
 //     event_control (INPUT)
 //         User allocated buffer. Must be page-aligned. Must be large enough to
-//         hold UvmToolsEventControlData (although single page-size allocation
-//         should be more than enough). One could call
-//         UvmToolsGetEventControlSize() function to find out current size of
-//         UvmToolsEventControlData. Gets pinned until queue is destroyed.
+//         hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
+//         UvmToolsEventControlData_V2 (although single page-size allocation
+//         should be more than enough). Gets pinned until queue is destroyed.
 //
 //     queue: (OUTPUT)
 //         Handle to the created queue.
@ -3460,22 +3514,32 @@ NvLength UvmToolsGetNumberOfCounters(void);
 //         Session handle does not refer to a valid session
 //
 //     NV_ERR_INVALID_ARGUMENT:
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
 //         One of the parameters: event_buffer, event_buffer_size, event_control
 //         is not valid
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES:
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate a queue of requested size. Another would be
-//         that either event_buffer or event_control memory couldn't be pinned
-//         (e.g. because of OS limitation of pinnable memory). Also it could not
-//         have been possible to create UvmToolsEventQueueDescriptor.
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate a queue of requested size. Another
+//         would be either event_buffer or event_control memory couldn't be
+//         pinned (e.g. because of OS limitation of pinnable memory). Also it
+//         could not have been possible to create UvmToolsEventQueueDescriptor.
 //
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(10)
 NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle     session,
                                   void                     *event_buffer,
                                   NvLength                  event_buffer_size,
                                   void                     *event_control,
                                   UvmToolsEventQueueHandle *queue);
+#else
+NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle        session,
+                                   UvmToolsEventQueueVersion    version,
+                                   void                        *event_buffer,
+                                   NvLength                     event_buffer_size,
+                                   void                        *event_control,
+                                   UvmToolsEventQueueHandle    *queue);
+#endif

 UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);

@ -3512,7 +3576,7 @@ NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
 //------------------------------------------------------------------------------
 // UvmToolsDestroyEventQueue
 //
-// Destroys all internal resources associated with the queue. It unpinns the
+// Destroys all internal resources associated with the queue. It unpins the
 // buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
 // destroyed when corresponding session gets destroyed.
 //
@ -3534,7 +3598,7 @@ NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
 // UvmEventQueueEnableEvents
 //
 // This call enables a particular event type in the event queue. All events are
-// disabled by default. Any event type is considered listed if and only if it's
+// disabled by default. Any event type is considered listed if and only if its
 // corresponding value is equal to 1 (in other words, bit is set). Disabled
 // events listed in eventTypeFlags are going to be enabled. Enabled events and
 // events not listed in eventTypeFlags are not affected by this call.
@ -3567,7 +3631,7 @@ NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
 // UvmToolsEventQueueDisableEvents
 //
 // This call disables a particular event type in the event queue. Any event type
-// is considered listed if and only if it's corresponding value is equal to 1
+// is considered listed if and only if its corresponding value is equal to 1
 // (in other words, bit is set). Enabled events listed in eventTypeFlags are
 // going to be disabled. Disabled events and events not listed in eventTypeFlags
 // are not affected by this call.
@ -3605,7 +3669,7 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
 //
 // Counters position follows the layout of the memory that UVM driver decides to
 // use. To obtain particular counter value, user should perform consecutive
-// atomic reads at a a given buffer + offset address.
+// atomic reads at a given buffer + offset address.
 //
 // It is not defined what is the initial value of a counter. User should rely on
 // a difference between each snapshot.
@ -3628,9 +3692,9 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
 //         Provided session is not valid
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate counters structure. Another would be that
-//         either event_buffer or event_control memory couldn't be pinned
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate counters structure. Another would be
+//         that either event_buffer or event_control memory couldn't be pinned
 //         (e.g. because of OS limitation of pinnable memory)
 //
 //------------------------------------------------------------------------------
@ -3641,12 +3705,12 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //------------------------------------------------------------------------------
 // UvmToolsCreateProcessorCounters
 //
-// Creates the counters structure for tracking per-process counters.
+// Creates the counters structure for tracking per-processor counters.
 // These counters are disabled by default.
 //
 // Counters position follows the layout of the memory that UVM driver decides to
 // use. To obtain particular counter value, user should perform consecutive
-// atomic reads at a a given buffer + offset address.
+// atomic reads at a given buffer + offset address.
 //
 // It is not defined what is the initial value of a counter. User should rely on
 // a difference between each snapshot.
@ -3662,7 +3726,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //         counters are destroyed.
 //
 //     processorUuid: (INPUT)
-//        UUID of the resource, for which counters will provide statistic data.
+//         UUID of the physical GPU if the GPU is not SMC capable or SMC
+//         enabled, the GPU instance UUID of the partition, or the CPU UUID of
+//         the resource, for which counters will provide statistic data.
 //
 //     counters: (OUTPUT)
 //         Handle to the created counters.
@ -3672,9 +3738,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session
 //         session handle does not refer to a valid tools session
 //
 //     NV_ERR_INSUFFICIENT_RESOURCES
-//         There could be multiple reasons for this error. One would be that it's
-//         not possible to allocate counters structure. Another would be that
-//         either event_buffer or event_control memory couldn't be pinned
+//         There could be multiple reasons for this error. One would be that
+//         it's not possible to allocate counters structure. Another would be
+//         that either event_buffer or event_control memory couldn't be pinned
 //         (e.g. because of OS limitation of pinnable memory)
 //
 //     NV_ERR_INVALID_ARGUMENT
@ -3690,7 +3756,7 @@ NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle   session,
 // UvmToolsDestroyCounters
 //
 // Destroys all internal resources associated with this counters structure.
-// It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
+// It unpins the buffer provided in UvmToolsCreate*Counters. Counters structure
 // also gest destroyed when corresponding session is destroyed.
 //
 // Arguments:
@ -3711,7 +3777,7 @@ NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
 // UvmToolsEnableCounters
 //
 // This call enables certain counter types in the counters structure. Any
-// counter type is considered listed if and only if it's corresponding value is
+// counter type is considered listed if and only if its corresponding value is
 // equal to 1 (in other words, bit is set). Disabled counter types listed in
 // counterTypeFlags are going to be enabled. Already enabled counter types and
 // counter types not listed in counterTypeFlags are not affected by this call.
@ -3745,7 +3811,7 @@ NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
 // UvmToolsDisableCounters
 //
 // This call disables certain counter types in the counters structure. Any
-// counter type is considered listed if and only if it's corresponding value is
+// counter type is considered listed if and only if its corresponding value is
 // equal to 1 (in other words, bit is set). Enabled counter types listed in
 // counterTypeFlags are going to be disabled. Already disabled counter types and
 // counter types not listed in counterTypeFlags are not affected by this call.
@ -3890,32 +3956,72 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle  session,
 // UvmToolsGetProcessorUuidTable
 //
 // Populate a table with the UUIDs of all the currently registered processors
-// in the target process.  When a GPU is registered, it is added to the table.
-// When a GPU is unregistered, it is removed.  As long as a GPU remains registered,
-// its index in the table does not change.  New registrations obtain the first
-// unused index.
+// in the target process. When a GPU is registered, it is added to the table.
+// When a GPU is unregistered, it is removed. As long as a GPU remains
+// registered, its index in the table does not change.
+// Note that the index in the table corresponds to the processor ID reported
+// in UvmEventEntry event records and that the table is not contiguously packed
+// with non-zero UUIDs even with no GPU unregistrations.
 //
 // Arguments:
 //     session: (INPUT)
 //         Handle to the tools session.
 //
+//     version: (INPUT)
+//         Requested version for the UUID table returned. The version must
+//         match the requested version of the event queue created with
+//         UvmToolsCreateEventQueue().
+//         See UvmEventEntry_V1 and UvmEventEntry_V2.
+//
 //     table: (OUTPUT)
 //         Array of processor UUIDs, including the CPU's UUID which is always
 //         at index zero.  The srcIndex and dstIndex fields of the
 //         UvmEventMigrationInfo struct index this array.  Unused indices will
-//         have a UUID of zero.
+//         have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
+//         for the UUID of the physical GPU and only supports a single SMC
+//         partition registered per process. Version UvmEventEntry_V2 supports
+//         multiple SMC partitions registered per process and uses physical GPU
+//         UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
+//         UUIDs for SMC partitions.
+//         The table pointer can be NULL in which case, the size of the table
+//         needed to hold all the UUIDs is returned in 'count'.
+//
+//     table_size: (INPUT)
+//         The size of the table in number of array elements. This can be
+//         zero if the table pointer is NULL.
 //
 //     count: (OUTPUT)
-//         Set by UVM to the number of UUIDs written, including any gaps in
-//         the table due to unregistered GPUs.
+//         On output, it is set by UVM to the number of UUIDs needed to hold
+//         all the UUIDs, including any gaps in the table due to unregistered
+//         GPUs.
 //
 // Error codes:
 //     NV_ERR_INVALID_ADDRESS:
-//         writing to table failed.
+//         writing to table failed or the count pointer was invalid.
+//
+//     NV_ERR_INVALID_ARGUMENT:
+//         The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
+//         The count pointer is NULL.
+//         See UvmToolsEventQueueVersion.
+//
+//     NV_WARN_MISMATCHED_TARGET:
+//         The kernel returned a table suitable for UvmEventEntry_V1 events.
+//         (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
+//
+//     NV_ERR_NO_MEMORY:
+//         Internal memory allocation failed.
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(10)
 NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle  session,
                                        NvProcessorUuid       *table,
                                        NvLength              *count);
+#else
+NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle      session,
+                                        UvmToolsEventQueueVersion  version,
+                                        NvProcessorUuid           *table,
+                                        NvLength                   table_size,
+                                        NvLength                  *count);
+#endif

 //------------------------------------------------------------------------------
 // UvmToolsFlushEvents
--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@ -34,16 +34,6 @@

    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())

-// ATS prefetcher uses hmm_range_fault() to query residency information.
-// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
-// of memory regions while hmm_range_fault() is being called, MMU interval
-// notifiers are needed.
-    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
-        #define UVM_ATS_PREFETCH_SUPPORTED() 1
-    #else
-        #define UVM_ATS_PREFETCH_SUPPORTED() 0
-    #endif
-
 typedef struct
 {
    // Mask of gpu_va_spaces which are registered for ATS access. The mask is
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@ -30,7 +30,7 @@
 #include <linux/mempolicy.h>
 #include <linux/mmu_notifier.h>

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
 #include <linux/hmm.h>
 #endif

@ -246,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
    return uvm_ats_region_from_start_end(start, end);
 }

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()

 static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
 {
@ -284,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
                                            uvm_ats_fault_context_t *ats_context)
 {
    NV_STATUS status = NV_OK;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
    int ret;
    NvU64 start;
    NvU64 end;
-    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
    struct hmm_range range;
    uvm_page_index_t page_index;
    uvm_va_block_region_t vma_region;
@ -370,6 +370,8 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,

    mmu_interval_notifier_remove(range.notifier);

+#else
+    uvm_page_mask_zero(residency_mask);
 #endif

    return status;
@ -403,21 +405,24 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
-    NV_STATUS status = NV_OK;
+    NV_STATUS status;
    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);

+    // Residency mask needs to be computed even if prefetching is disabled since
+    // the residency information is also needed by access counters servicing in
+    // uvm_ats_service_access_counters()
+    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        return status;
+
    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
        return status;

    if (uvm_page_mask_empty(accessed_mask))
        return status;

-    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
-    if (status != NV_OK)
-        return status;
-
    // Prefetch the entire region if none of the pages are resident on any node
    // and if preferred_location is the faulting GPU.
    if (ats_context->prefetch_state.has_preferred_location &&
@ -637,8 +642,18 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

+    // Ignoring the return value of ats_compute_prefetch is ok since prefetching
+    // is just an optimization and servicing access counter migrations is still
+    // worthwhile even without any prefetching added. So, let servicing continue
+    // instead of returning early even if the prefetch computation fails.
    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);

+    // Remove pages which are already resident at the intended destination from
+    // the accessed_mask.
+    uvm_page_mask_andnot(&ats_context->accessed_mask,
+                         &ats_context->accessed_mask,
+                         &ats_context->prefetch_state.residency_mask);
+
    for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
        NV_STATUS status;
        NvU64 start = base + (subregion.first * PAGE_SIZE);
--- a/kernel-open/nvidia-uvm/uvm_common.c
+++ b/kernel-open/nvidia-uvm/uvm_common.c
@ -318,10 +318,11 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
    unsigned i;
    unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;

-    memcpy(buffer, "UVM-GPU-", 8);
    if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
        return *buffer = 0;

+    memcpy(buffer, "UVM-GPU-", 8);
+
    for (i = 0; i < 16; i++) {
        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
        *str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@ -151,22 +151,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
    return NV_OK;
 }

-static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
-{
-    uvm_gpu_t *gpu;
-
-    // TODO: Bug 4351121: RM will return the GI UUID, but
-    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
-    // Match on GI UUID until the UVM user level API has been updated to use
-    // the GI UUID.
-    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
-            mem_info->uuid = gpu->parent->uuid;
-            break;
-        }
-    }
-}
-
 static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
 {
    NV_STATUS status = NV_OK;
@ -197,11 +181,6 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
    if (status != NV_OK)
        return status;

-    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
-    // physical GPU UUID until the UVM user level has been updated to use
-    // the GI UUID.
-    fix_memory_info_uuid(va_space, &memory_info);
-
    TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);

    TEST_CHECK_GOTO((memory_info.size == params->size), done);
@ -309,11 +288,6 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
   if (status != NV_OK)
       return status;

-    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
-    // physical GPU UUID until the UVM user level has been updated to use
-    // the GI UUID.
-    fix_memory_info_uuid(va_space, &memory_info);
-
    memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));

    memset(pte_buffer, 0, sizeof(pte_buffer));
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -314,7 +314,7 @@ static NV_STATUS uvm_suspend(void)
        // interrupts in the bottom half in the future, the bottom half flush
        // below will no longer be able to guarantee that all outstanding
        // notifications have been handled.
-        uvm_gpu_access_counters_set_ignore(gpu, true);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);

        uvm_parent_gpu_set_isr_suspended(gpu->parent, true);

@ -373,13 +373,13 @@ static NV_STATUS uvm_resume(void)

        // Bring the fault buffer software state back in sync with the
        // hardware state.
-        uvm_gpu_fault_buffer_resume(gpu->parent);
+        uvm_parent_gpu_fault_buffer_resume(gpu->parent);

        uvm_parent_gpu_set_isr_suspended(gpu->parent, false);

        // Reenable access counter interrupt processing unless notifications
        // have been set to be suppressed.
-        uvm_gpu_access_counters_set_ignore(gpu, false);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
    }

    uvm_up_write(&g_uvm_global.pm.lock);
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -59,6 +59,7 @@ MODULE_PARM_DESC(uvm_peer_copy, "Choose the addressing mode for peer copying, op

 static void remove_gpu(uvm_gpu_t *gpu);
 static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
+static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu);
 static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu);
 static void destroy_nvlink_peers(uvm_gpu_t *gpu);

@ -241,6 +242,8 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
        gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
    }

+    gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
+
    return NV_OK;
 }

@ -843,11 +846,11 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
    if (!uvm_procfs_is_enabled())
        return NV_OK;

-    format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), uvm_gpu_uuid(gpu));
+    format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->parent->uuid);

    gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();

-    // Create UVM-GPU-${UUID}/${sub_processor_index} directory
+    // Create UVM-GPU-${physical-UUID}/${sub_processor_index} directory
    snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));

    gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
@ -855,7 +858,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
        return NV_ERR_OPERATING_SYSTEM;

    // Create symlink from ${gpu_id} to
-    // gpus/UVM-GPU-${UUID}/${sub_processor_index}
+    // UVM-GPU-${physical-UUID}/${sub_processor_index}
    snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
    snprintf(gpu_dir_name,
             sizeof(gpu_dir_name),
@ -867,6 +870,16 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
    if (gpu->procfs.dir_symlink == NULL)
        return NV_ERR_OPERATING_SYSTEM;

+    if (gpu->parent->smc.enabled) {
+        // Create symlink from UVM-GPU-${GI-UUID} to
+        // UVM-GPU-${physical-UUID}/${sub_processor_index}
+        format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->uuid);
+
+        gpu->procfs.gpu_instance_uuid_symlink = proc_symlink(uuid_text_buffer, gpu_base_dir_entry, gpu_dir_name);
+        if (gpu->procfs.gpu_instance_uuid_symlink == NULL)
+            return NV_ERR_OPERATING_SYSTEM;
+    }
+
    // GPU peer files are debug only
    if (!uvm_procfs_is_debug_enabled())
        return NV_OK;
@ -882,6 +895,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
 static void deinit_procfs_dirs(uvm_gpu_t *gpu)
 {
    proc_remove(gpu->procfs.dir_peers);
+    proc_remove(gpu->procfs.gpu_instance_uuid_symlink);
    proc_remove(gpu->procfs.dir_symlink);
    proc_remove(gpu->procfs.dir);
 }
@ -1038,6 +1052,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
    NvU32 num_entries;
    NvU64 va_size;
    NvU64 va_per_entry;
+    uvm_mmu_page_table_alloc_t *tree_alloc;

    status = uvm_page_tree_init(gpu,
                                NULL,
@ -1059,20 +1074,30 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
    // Make sure that RM's part of the VA is aligned to the VA covered by a
    // single top level PDE.
    UVM_ASSERT_MSG(gpu->parent->rm_va_base % va_per_entry == 0,
-                   "va_base 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_base, va_per_entry);
+                   "va_base 0x%llx va_per_entry 0x%llx\n",
+                   gpu->parent->rm_va_base,
+                   va_per_entry);
    UVM_ASSERT_MSG(gpu->parent->rm_va_size % va_per_entry == 0,
-                   "va_size 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_size, va_per_entry);
+                   "va_size 0x%llx va_per_entry 0x%llx\n",
+                   gpu->parent->rm_va_size,
+                   va_per_entry);

+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
+    UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
+
+    tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
    status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
-            uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
-            uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
-            gpu_get_internal_pasid(gpu)));
+                                                               tree_alloc->addr.address,
+                                                               num_entries,
+                                                               tree_alloc->addr.aperture == UVM_APERTURE_VID,
+                                                               gpu_get_internal_pasid(gpu)));
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu));
        return status;
    }
+
    gpu->rm_address_space_moved_to_page_tree = true;

    return NV_OK;
@ -1212,6 +1237,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,

 static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
 {
+    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+    size_t len;
    NV_STATUS status;

    if (gpu->parent->smc.enabled) {
@ -1229,6 +1256,20 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
    uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
    gpu->smc.swizz_id = gpu_info->smcSwizzId;

+    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->parent->uuid);
+    snprintf(gpu->name,
+             sizeof(gpu->name),
+             "ID %u: %s",
+             uvm_id_value(gpu->id),
+             uuid_buffer + 4);
+
+    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->uuid);
+    len = strlen(gpu->name);
+    snprintf(gpu->name + len,
+             sizeof(gpu->name) - len,
+             " UVM-GI-%s",
+             uuid_buffer + 8);
+
    // Initialize the per-GPU procfs dirs as early as possible so that other
    // parts of the driver can add files in them as part of their per-GPU init.
    status = init_procfs_dirs(gpu);
@ -1338,7 +1379,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
                         uvm_parent_gpu_t *parent_gpu,
                         uvm_gpu_t **gpu_out)
 {
-    char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
    NV_STATUS status;
    bool alloc_parent = (parent_gpu == NULL);
    uvm_gpu_t *gpu = NULL;
@ -1364,13 +1404,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
    if (alloc_parent)
        fill_parent_gpu_info(parent_gpu, gpu_info);

-    format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
-    snprintf(gpu->name,
-             sizeof(gpu->name),
-             "ID %u: %s",
-             uvm_id_value(gpu->id),
-             uuid_buffer);
-
    // After this point all error clean up should be handled by remove_gpu()

    if (!gpu_supports_uvm(parent_gpu)) {
@ -1432,13 +1465,25 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,

    uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);

-    if (alloc_parent) {
+    if (gpu->parent->smc.enabled) {
+        status = discover_smc_peers(gpu);
+        if (status != NV_OK) {
+            // Nobody can have retained the GPU yet, since we still hold the
+            // global lock.
+            UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
+            atomic64_set(&gpu->retained_count, 0);
+            goto error;
+        }
+    }
+    else if (alloc_parent) {
        status = discover_nvlink_peers(gpu);
        if (status != NV_OK) {
-            UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
+            UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n",
+                          nvstatusToString(status),
+                          uvm_gpu_name(gpu));

-            // Nobody can have retained the GPU yet, since we still hold the global
-            // lock.
+            // Nobody can have retained the GPU yet, since we still hold the
+            // global lock.
            UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
            atomic64_set(&gpu->retained_count, 0);
            goto error;
@ -1686,7 +1731,7 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)

    nv_kthread_q_stop(&parent_gpu->lazy_free_q);

-    for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
+    for_each_sub_processor_index(sub_processor_index)
        UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);

    uvm_kvfree(parent_gpu);
@ -1915,32 +1960,25 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
    return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
 }

-static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
+uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
 {
    uvm_gpu_id_t gpu_id;

+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
    for_each_gpu_id(gpu_id) {
        uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);

        if (gpu) {
-            if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
-                UVM_ASSERT(!gpu->parent->smc.enabled);
+            if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
                return gpu;
-            }
        }
    }

    return NULL;
 }

-uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
-{
-    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    return gpu_get_by_uuid_locked(gpu_uuid);
-}
-
-uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
+static uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
 {
    uvm_gpu_t *gpu;

@ -1998,7 +2036,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,

    if (parent_gpu != NULL) {
        // If the UUID has been seen before, and if SMC is enabled, then check
-        // if this specific partition has been seen previously.  The UUID-based
+        // if this specific partition has been seen previously. The UUID-based
        // look-up above may have succeeded for a different partition with the
        // same parent GPU.
        if (gpu_info->smcEnabled) {
@ -2287,7 +2325,7 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
        return NV_ERR_OPERATING_SYSTEM;

    // Create a symlink from UVM GPU UUID (UVM-GPU-...) to the UVM GPU ID gpuB
-    format_uuid_to_buffer(symlink_name, sizeof(symlink_name), uvm_gpu_uuid(remote));
+    format_uuid_to_buffer(symlink_name, sizeof(symlink_name), &remote->uuid);
    peer_caps->procfs.peer_symlink_file[local_idx] = proc_symlink(symlink_name,
                                                                  local->procfs.dir_peers,
                                                                  gpu_dir_name);
@ -2297,6 +2335,24 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
    return NV_OK;
 }

+static NV_STATUS init_procfs_peer_files(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
+{
+    NV_STATUS status;
+
+    if (!uvm_procfs_is_debug_enabled())
+        return NV_OK;
+
+    status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
+    if (status != NV_OK)
+        return status;
+
+    status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
+    if (status != NV_OK)
+        return status;
+
+    return NV_OK;
+}
+
 static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
                                  uvm_gpu_t *gpu1,
                                  const UvmGpuP2PCapsParams *p2p_caps_params,
@ -2377,16 +2433,41 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
        uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
    }

-    if (!uvm_procfs_is_debug_enabled())
-        return NV_OK;
+    return init_procfs_peer_files(gpu0, gpu1);
+}

-    status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
-    if (status != NV_OK)
-        return status;
+static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu)
+{
+    NvU32 sub_processor_index;
+    uvm_gpu_t *other_gpu;
+    NV_STATUS status;

-    status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
-    if (status != NV_OK)
-        return status;
+    UVM_ASSERT(gpu);
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+    UVM_ASSERT(gpu->parent->smc.enabled);
+
+    for_each_sub_processor_index(sub_processor_index) {
+        uvm_gpu_peer_t *peer_caps;
+
+        other_gpu = gpu->parent->gpus[sub_processor_index];
+        if (!other_gpu || other_gpu == gpu)
+            continue;
+
+        peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
+        if (peer_caps->ref_count == 1)
+            continue;
+
+        UVM_ASSERT(peer_caps->ref_count == 0);
+
+        memset(peer_caps, 0, sizeof(*peer_caps));
+        peer_caps->ref_count = 1;
+
+        status = init_procfs_peer_files(gpu, other_gpu);
+        if (status != NV_OK) {
+            peer_caps->ref_count = 0;
+            return status;
+        }
+    }

    return NV_OK;
 }
@ -2489,9 +2570,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)

    UVM_ASSERT(gpu);
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
-
-    if (gpu->parent->smc.enabled)
-        return NV_OK;
+    UVM_ASSERT(!gpu->parent->smc.enabled);

    for_each_gpu(other_gpu) {
        UvmGpuP2PCapsParams p2p_caps_params;
@ -2592,10 +2671,6 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
    UVM_ASSERT(gpu0);
    UVM_ASSERT(gpu1);

-    // P2P is not supported under SMC partitioning
-    UVM_ASSERT(!gpu0->parent->smc.enabled);
-    UVM_ASSERT(!gpu1->parent->smc.enabled);
-
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

    peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
@ -2638,9 +2713,9 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
    // IDs queried from the peer table above which are about to be removed from
    // the global table.
    if (gpu0->parent->access_counters_supported)
-        uvm_gpu_access_counter_buffer_flush(gpu0);
+        uvm_parent_gpu_access_counter_buffer_flush(gpu0->parent);
    if (gpu1->parent->access_counters_supported)
-        uvm_gpu_access_counter_buffer_flush(gpu1);
+        uvm_parent_gpu_access_counter_buffer_flush(gpu1->parent);

    memset(peer_caps, 0, sizeof(*peer_caps));
 }
@ -2668,12 +2743,17 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu)
 {
    size_t peer_index;
-    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);

    // Indirect peers are accessed as sysmem addresses
    if (peer_caps->is_indirect_peer)
        return UVM_APERTURE_SYS;

+    // MIG instances in the same physical GPU have vidmem addresses
+    if (local_gpu->parent == remote_gpu->parent)
+        return UVM_APERTURE_VID;
+
+    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
+
    if (uvm_id_value(local_gpu->id) < uvm_id_value(remote_gpu->id))
        peer_index = 0;
    else
@ -3285,12 +3365,19 @@ NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *fil
        .user_client   = params->hClient,
        .user_object   = params->hSmcPartRef,
    };
+    NvProcessorUuid gpu_instance_uuid;
+    NV_STATUS status;

-    return uvm_va_space_register_gpu(va_space,
-                                     &params->gpu_uuid,
-                                     &user_rm_va_space,
-                                     &params->numaEnabled,
-                                     &params->numaNodeId);
+    status = uvm_va_space_register_gpu(va_space,
+                                       &params->gpu_uuid,
+                                       &user_rm_va_space,
+                                       &params->numaEnabled,
+                                       &params->numaNodeId,
+                                       &gpu_instance_uuid);
+    if (status == NV_OK)
+        uvm_uuid_copy(&params->gpu_uuid, &gpu_instance_uuid);
+
+    return status;
 }

 NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp)
@ -3363,10 +3450,10 @@ NV_STATUS uvm_test_set_prefetch_filtering(UVM_TEST_SET_PREFETCH_FILTERING_PARAMS

    switch (params->filtering_mode) {
        case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_ALL:
-            uvm_gpu_disable_prefetch_faults(gpu->parent);
+            uvm_parent_gpu_disable_prefetch_faults(gpu->parent);
            break;
        case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_NONE:
-            uvm_gpu_enable_prefetch_faults(gpu->parent);
+            uvm_parent_gpu_enable_prefetch_faults(gpu->parent);
            break;
        default:
            status = NV_ERR_INVALID_ARGUMENT;
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@ -618,9 +618,10 @@ struct uvm_gpu_struct
    // The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
    NvProcessorUuid uuid;

-    // Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
+    // Nice printable name in the format:
+    // ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
    // UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
-    char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+    char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];

    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
@ -656,6 +657,10 @@ struct uvm_gpu_struct
        // can allocate through PMM (PMA).
        NvU64 max_allocatable_address;

+        // Max supported vidmem page size may be smaller than the max GMMU page
+        // size, because of the vMMU supported page sizes.
+        NvU64 max_vidmem_page_size;
+
        struct
        {
            // True if the platform supports HW coherence and the GPU's memory
@ -844,6 +849,9 @@ struct uvm_gpu_struct

        struct proc_dir_entry *dir_symlink;

+        // The GPU instance UUID symlink if SMC is enabled.
+        struct proc_dir_entry *gpu_instance_uuid_symlink;
+
        struct proc_dir_entry *info_file;

        struct proc_dir_entry *dir_peers;
@ -1210,11 +1218,6 @@ static const char *uvm_gpu_name(uvm_gpu_t *gpu)
    return gpu->name;
 }

-static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
-{
-    return &gpu->parent->uuid;
-}
-
 static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
 {
    if (gpu->parent->smc.enabled)
@ -1234,6 +1237,9 @@ struct uvm_gpu_peer_struct
    // - The global lock is held.
    //
    // - While the global lock was held in the past, the two GPUs were detected
+    //   to be SMC peers and were both retained.
+    //
+    // - While the global lock was held in the past, the two GPUs were detected
    //   to be NVLINK peers and were both retained.
    //
    // - While the global lock was held in the past, the two GPUs were detected
@ -1319,17 +1325,17 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
 // Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
 // a circular dep between global and gpu modules.

-// Get a uvm_gpu_t by UUID.  This returns NULL if the GPU is not present.  This
-// is the general purpose call that should be used normally.
-// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
-// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
-// instead.
+// Get a uvm_gpu_t by UUID (physical GPU UUID if SMC is not enabled, otherwise
+// GPU instance UUID).
+// This returns NULL if the GPU is not present.
+// This is the general purpose call that should be used normally.
 //
 // LOCKING: requires the global lock to be held
 uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

-// Get a uvm_parent_gpu_t by UUID.  Like uvm_gpu_get_by_uuid(), this function
-// returns NULL if the GPU has not been registered.
+// Get a uvm_parent_gpu_t by UUID (physical GPU UUID).
+// Like uvm_gpu_get_by_uuid(), this function returns NULL if the GPU has not
+// been registered.
 //
 // LOCKING: requires the global lock to be held
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
@ -1340,13 +1346,6 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
 // limited cases.
 uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);

-// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
-// the partition hasn't been registered. This call needs to be used instead of
-// uvm_gpu_get_by_uuid() when a specific partition is targeted.
-//
-// LOCKING: requires the global lock to be held
-uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);
-
 // Retain a gpu by uuid
 // Returns the retained uvm_gpu_t in gpu_out on success
 //
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2023 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -33,7 +33,7 @@
 #include "uvm_va_space_mm.h"
 #include "uvm_pmm_sysmem.h"
 #include "uvm_perf_module.h"
-#include "uvm_ats_ibm.h"
+#include "uvm_ats.h"
 #include "uvm_ats_faults.h"

 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN     1
@ -99,7 +99,8 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
                 "Number of remote accesses on a region required to trigger a notification."
                 "Valid values: [1, 65535]");

-static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode);
+static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
+                                               uvm_gpu_buffer_flush_mode_t flush_mode);

 static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};

@ -126,7 +127,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va

 // Whether access counter migrations are enabled or not. The policy is as
 // follows:
-// - MIMC migrations are disabled by default on all systems except P9.
+// - MIMC migrations are disabled by default on all non-ATS systems.
 // - MOMC migrations are disabled by default on all systems
 // - Users can override this policy by specifying on/off
 static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -149,7 +150,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
    if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
        return false;

-    if (UVM_ATS_IBM_SUPPORTED())
+    if (UVM_ATS_SUPPORTED())
        return g_uvm_global.ats.supported;

    return false;
@ -281,7 +282,7 @@ get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm
                                                         &(access_counters)->current_config.momc;
 }

-bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->access_counters_supported);

@ -340,7 +341,7 @@ static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *confi
    UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
 }

-NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
@ -444,12 +445,12 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;

 fail:
-    uvm_gpu_deinit_access_counters(parent_gpu);
+    uvm_parent_gpu_deinit_access_counters(parent_gpu);

    return status;
 }

-void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
    uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
@ -475,7 +476,7 @@ void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
    batch_context->phys.translations = NULL;
 }

-bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
 {
    if (!parent_gpu->access_counters_supported)
        return false;
@ -518,7 +519,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
    // taken control of the notify buffer since the GPU was initialized. Then
    // flush old notifications. This will update the cached_put pointer.
    access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
-    access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
+    access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);

    access_counters->current_config.threshold = config->threshold;

@ -537,20 +538,20 @@ error:

 // If ownership is yielded as part of reconfiguration, the access counters
 // handling refcount may not be 0
-static void access_counters_yield_ownership(uvm_gpu_t *gpu)
+static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
-    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;

-    UVM_ASSERT(gpu->parent->access_counters_supported);
-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));

    // Wait for any pending clear operation befor releasing ownership
    status = uvm_tracker_wait(&access_counters->clear_tracker);
    if (status != NV_OK)
        UVM_ASSERT(status == uvm_global_get_status());

-    status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
+    status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
                                                                &access_counters->rm_info));
    UVM_ASSERT(status == NV_OK);
 }
@ -579,14 +580,14 @@ static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConf

 // Decrement the refcount of access counter enablement. If this is the last
 // reference, disable the HW feature.
-static void gpu_access_counters_disable(uvm_gpu_t *gpu)
+static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
-    UVM_ASSERT(gpu->parent->access_counters_supported);
-    UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);
+    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);

-    if (--gpu->parent->isr.access_counters.handling_ref_count == 0)
-        access_counters_yield_ownership(gpu);
+    if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
+        access_counters_yield_ownership(parent_gpu);
 }

 // Invoked during registration of the GPU in the VA space
@ -598,7 +599,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac

    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);

-    if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
+    if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
        status = NV_ERR_INVALID_DEVICE;
    }
    else {
@ -616,7 +617,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
        // modified to protect from concurrent enablement of access counters in
        // another GPU
        if (status == NV_OK)
-            uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
+            uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
    }

    // If this is the first reference taken on access counters, dropping the
@ -626,22 +627,24 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
    return status;
 }

-void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
+void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
+                                            uvm_va_space_t *va_space)
 {
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(parent_gpu->access_counters_supported);

-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

-    if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
-        gpu_access_counters_disable(gpu);
+    if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
+                                                        parent_gpu->id)) {
+        parent_gpu_access_counters_disable(parent_gpu);

        // If this is VA space reconfigured access counters, clear the
        // ownership to allow for other processes to invoke the reconfiguration
-        if (gpu->parent->access_counter_buffer_info.reconfiguration_owner == va_space)
-            gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
+        if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
+            parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
    }

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
@ -660,15 +663,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
    UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
 }

-static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode)
+static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
+                                               uvm_gpu_buffer_flush_mode_t flush_mode)
 {
    NvU32 get;
    NvU32 put;
    uvm_spin_loop_t spin;
-    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
+    uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;

-    UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
+    UVM_ASSERT(parent_gpu->access_counters_supported);

    // Read PUT pointer from the GPU if requested
    UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
@ -680,28 +684,28 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl

    while (get != put) {
        // Wait until valid bit is set
-        UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin);
+        UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);

-        gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get);
+        parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
        ++get;
        if (get == access_counters->max_notifications)
            get = 0;
    }

-    write_get(gpu->parent, get);
+    write_get(parent_gpu, get);
 }

-void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
+void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT(gpu->parent->access_counters_supported);
+    UVM_ASSERT(parent_gpu->access_counters_supported);

    // Disables access counter interrupts and notification servicing
-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

-    if (gpu->parent->isr.access_counters.handling_ref_count > 0)
-        access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
+    if (parent_gpu->isr.access_counters.handling_ref_count > 0)
+        access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
@ -1027,7 +1031,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
        if (!iter.migratable)
            continue;

-        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, address, processor);
+        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
        if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
            // If the page is throttling, ignore the access counter
            // notification
@ -1212,7 +1216,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,

        service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
        service_context->num_retries = 0;
-        service_context->block_context->mm = mm;
+
+        uvm_va_block_context_init(service_context->block_context, mm);

        if (uvm_va_block_is_hmm(va_block))
            uvm_hmm_migrate_begin_wait(va_block);
@ -1221,7 +1226,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,

        reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);

-        status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
+        status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
+                                           &va_block_retry,
                                           service_va_block_locked(processor,
                                                                   va_block,
                                                                   &va_block_retry,
@ -1506,8 +1512,6 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
    service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
    service_context->num_retries = 0;

-    uvm_va_block_context_init(service_context->block_context, mm);
-
    return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
                                     &va_block_retry,
                                     service_va_block_locked(processor,
@ -1519,6 +1523,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,

 static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
                                      uvm_va_block_t *va_block,
+                                      uvm_va_block_context_t *va_block_context,
                                      uvm_page_mask_t *accessed_pages,
                                      const uvm_access_counter_buffer_entry_t *current_entry)
 {
@ -1546,7 +1551,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,

    page_index = uvm_va_block_cpu_page_index(va_block, addr);

-    resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
+    resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);

    // resident_id might be invalid or might already be the same as the GPU
    // which received the notification if the memory was already migrated before
@ -1602,6 +1607,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
    uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
+    uvm_service_block_context_t *service_context = &batch_context->block_service_context;

    UVM_ASSERT(va_block);
    UVM_ASSERT(index < batch_context->virt.num_notifications);
@ -1610,16 +1616,24 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_

    uvm_page_mask_zero(accessed_pages);

+    uvm_va_block_context_init(service_context->block_context, mm);
+
    uvm_mutex_lock(&va_block->lock);

    for (i = index; i < batch_context->virt.num_notifications; i++) {
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
        NvU64 address = current_entry->address.address;

-        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
-            expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
-        else
+        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
+            expand_notification_block(gpu_va_space,
+                                      va_block,
+                                      batch_context->block_service_context.block_context,
+                                      accessed_pages,
+                                      current_entry);
+        }
+        else {
            break;
+        }
    }

    *out_index = i;
@ -1698,6 +1712,9 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
    // Atleast one notification should have been processed.
    UVM_ASSERT(index < *out_index);

+    // TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
+    //                    location is set
+    // If no pages were actually migrated, don't clear the access counters.
    status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
    if (status != NV_OK)
        flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
@ -1985,7 +2002,7 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
    if (!gpu)
        return NV_ERR_INVALID_DEVICE;

-    params->enabled = uvm_gpu_access_counters_required(gpu->parent);
+    params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);

    uvm_gpu_release(gpu);

@ -2050,11 +2067,11 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
        goto exit_isr_unlock;
    }

-    if (!uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
+    if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
        status = gpu_access_counters_enable(gpu, &config);

        if (status == NV_OK)
-            uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
+            uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
        else
            goto exit_isr_unlock;
    }
@ -2066,7 +2083,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
    // enabled in at least gpu. This inconsistent state is not visible to other
    // threads or VA spaces because of the ISR lock, and it is immediately
    // rectified by retaking ownership.
-    access_counters_yield_ownership(gpu);
+    access_counters_yield_ownership(gpu->parent);
    status = access_counters_take_ownership(gpu, &config);

    // Retaking ownership failed, so RM owns the interrupt.
@ -2080,8 +2097,8 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
                           "Access counters interrupt still owned by RM, other VA spaces may experience failures");
        }

-        uvm_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id);
-        gpu_access_counters_disable(gpu);
+        uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
+        parent_gpu_access_counters_disable(gpu->parent);
        goto exit_isr_unlock;
    }

@ -2167,42 +2184,42 @@ exit_release_gpu:
    return status;
 }

-void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
+void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
 {
    bool change_intr_state = false;

-    if (!gpu->parent->access_counters_supported)
+    if (!parent_gpu->access_counters_supported)
        return;

-    uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_lock(parent_gpu);

    if (do_ignore) {
-        if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
+        if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
            change_intr_state = true;
    }
    else {
-        UVM_ASSERT(gpu->parent->access_counter_buffer_info.notifications_ignored_count >= 1);
-        if (--gpu->parent->access_counter_buffer_info.notifications_ignored_count == 0)
+        UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
+        if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
            change_intr_state = true;
    }

    if (change_intr_state) {
        // We need to avoid an interrupt storm while ignoring notifications. We
        // just disable the interrupt.
-        uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
+        uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

        if (do_ignore)
-            uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
+            uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
        else
-            uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
+            uvm_parent_gpu_access_counters_intr_enable(parent_gpu);

-        uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
+        uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

        if (!do_ignore)
-            access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
+            access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
    }

-    uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
 }

 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
@ -2216,7 +2233,7 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
        return NV_ERR_INVALID_DEVICE;

    if (gpu->parent->access_counters_supported)
-        uvm_gpu_access_counters_set_ignore(gpu, params->ignore);
+        uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
    else
        status = NV_ERR_NOT_SUPPORTED;

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -27,13 +27,13 @@
 #include "uvm_forward_decl.h"
 #include "uvm_test_ioctl.h"

-NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
-bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);

 void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);

-void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
+void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);

 // Ignore or unignore access counters notifications. Ignoring means that the
 // bottom half is a no-op which just leaves notifications in the HW buffer
@ -46,7 +46,7 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
 //
 // When uningoring, the interrupt conditions will be re-evaluated to trigger
 // processing of buffered notifications, if any exist.
-void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
+void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);

 // Return whether the VA space has access counter migrations enabled. The
 // caller must ensure that the VA space cannot go away.
@ -63,7 +63,7 @@ void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);

 // Check whether access counters should be enabled when the given GPU is
 // registered on any VA space.
-bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);

 // Functions used to enable/disable access counters on a GPU in the given VA
 // space.
@ -72,12 +72,12 @@ bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
 // counters are currently enabled. The hardware notifications and interrupts on
 // the GPU are enabled the first time any VA space invokes
 // uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
-// uvm_gpu_access_counters_disable
+// uvm_parent_gpu_access_counters_disable().
 //
 // Locking: the VA space lock must not be held by the caller since these
 // functions may take the access counters ISR lock.
 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
-void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
+void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);

 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
                                                      struct file *filp);
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2023 NVIDIA Corporation
+    Copyright (c) 2016-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -100,7 +100,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
    if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
        return 0;

-    if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
+    if (!uvm_parent_gpu_replayable_faults_pending(parent_gpu)) {
        up(&parent_gpu->isr.replayable_faults.service_lock.sem);
        return 0;
    }
@ -137,7 +137,7 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
    // interrupts will be triggered by the gpu and faults may stay
    // unserviced. Therefore, if there is a fault in the queue, we schedule
    // a bottom half unconditionally.
-    if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
+    if (!uvm_parent_gpu_non_replayable_faults_pending(parent_gpu))
        return 0;

    nv_kref_get(&parent_gpu->gpu_kref);
@ -167,7 +167,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
    if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
        return 0;

-    if (!uvm_gpu_access_counters_pending(parent_gpu)) {
+    if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
        up(&parent_gpu->isr.access_counters.service_lock.sem);
        return 0;
    }
@ -295,7 +295,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
    uvm_va_block_context_t *block_context;

    if (parent_gpu->replayable_faults_supported) {
-        status = uvm_gpu_fault_buffer_init(parent_gpu);
+        status = uvm_parent_gpu_fault_buffer_init(parent_gpu);
        if (status != NV_OK) {
            UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
                          nvstatusToString(status),
@ -361,7 +361,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        }

        if (parent_gpu->access_counters_supported) {
-            status = uvm_gpu_init_access_counters(parent_gpu);
+            status = uvm_parent_gpu_init_access_counters(parent_gpu);
            if (status != NV_OK) {
                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
                              nvstatusToString(status),
@ -423,7 +423,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
    // bottom half never take the global lock, since we're holding it here.
    //
    // Note that it's safe to call nv_kthread_q_stop() even if
-    // nv_kthread_q_init() failed in uvm_gpu_init_isr().
+    // nv_kthread_q_init() failed in uvm_parent_gpu_init_isr().
    nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
    nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
 }
@ -438,8 +438,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
        // replayable_faults.disable_intr_ref_count since they must retain the
        // GPU across uvm_parent_gpu_replayable_faults_isr_lock/
        // uvm_parent_gpu_replayable_faults_isr_unlock. This means the
-        // uvm_gpu_replayable_faults_disable_intr above could only have raced
-        // with bottom halves.
+        // uvm_parent_gpu_replayable_faults_disable_intr above could only have
+        // raced with bottom halves.
        //
        // If we cleared replayable_faults.handling before the bottom half got
        // to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
@ -455,13 +455,13 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
                       uvm_parent_gpu_name(parent_gpu),
                       parent_gpu->isr.replayable_faults.disable_intr_ref_count);

-        uvm_gpu_fault_buffer_deinit(parent_gpu);
+        uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
    }

    if (parent_gpu->access_counters_supported) {
        // It is safe to deinitialize access counters even if they have not been
        // successfully initialized.
-        uvm_gpu_deinit_access_counters(parent_gpu);
+        uvm_parent_gpu_deinit_access_counters(parent_gpu);
        block_context =
            parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
        uvm_va_block_context_free(block_context);
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2023 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -116,8 +116,8 @@


 // There is no error handling in this function. The caller is in charge of
-// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
-NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
+NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;

@ -145,7 +145,7 @@ NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *pare
    return NV_OK;
 }

-void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;

@ -163,7 +163,7 @@ void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_
    non_replayable_faults->fault_cache        = NULL;
 }

-bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status;
    NvBool has_pending_faults;
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017 NVIDIA Corporation
+    Copyright (c) 2017-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -26,12 +26,12 @@
 #include <nvstatus.h>
 #include "uvm_forward_decl.h"

-bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);

 void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);

-NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);

-void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -44,6 +44,24 @@
 // provides some background for understanding replayable faults, non-replayable
 // faults, and how UVM services each fault type.

+// The HW fault buffer flush mode instructs RM on how to flush the hardware
+// replayable fault buffer; it is only used in Confidential Computing.
+//
+// Unless HW_FAULT_BUFFER_FLUSH_MODE_MOVE is functionally required (because UVM
+// needs to inspect the faults currently present in the HW fault buffer) it is
+// recommended to use HW_FAULT_BUFFER_FLUSH_MODE_DISCARD for performance
+// reasons.
+typedef enum
+{
+    // Flush the HW fault buffer, discarding all the resulting faults. UVM never
+    // gets to see these faults.
+    HW_FAULT_BUFFER_FLUSH_MODE_DISCARD,
+
+    // Flush the HW fault buffer, and move all the resulting faults to the SW
+    // fault ("shadow") buffer.
+    HW_FAULT_BUFFER_FLUSH_MODE_MOVE,
+} hw_fault_buffer_flush_mode_t;
+
 #define UVM_PERF_REENABLE_PREFETCH_FAULTS_LAPSE_MSEC_DEFAULT 1000

 // Lapse of time in milliseconds after which prefetch faults can be re-enabled.
@ -226,7 +244,7 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
    batch_context->utlbs               = NULL;
 }

-NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
+NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;

@ -253,7 +271,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
        goto fail;

    if (parent_gpu->non_replayable_faults_supported) {
-        status = uvm_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
+        status = uvm_parent_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
        if (status != NV_OK)
            goto fail;
    }
@ -261,28 +279,28 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;

 fail:
-    uvm_gpu_fault_buffer_deinit(parent_gpu);
+    uvm_parent_gpu_fault_buffer_deinit(parent_gpu);

    return status;
 }

 // Reinitialize state relevant to replayable fault handling after returning
 // from a power management cycle.
-void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->replayable_faults_supported);

    fault_buffer_reinit_replayable_faults(parent_gpu);
 }

-void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

    if (parent_gpu->non_replayable_faults_supported)
-        uvm_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
+        uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);

    fault_buffer_deinit_replayable_faults(parent_gpu);

@ -297,7 +315,7 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
    }
 }

-bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
+bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;

@ -533,25 +551,26 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
    parent_gpu->fault_buffer_hal->write_get(parent_gpu, get);
 }

-static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
+// In Confidential Computing GSP-RM owns the HW replayable fault buffer.
+// Flushing the fault buffer implies flushing both the HW buffer (using a RM
+// API), and the SW buffer accessible by UVM ("shadow" buffer).
+//
+// The HW buffer needs to be flushed first. This is because, once that flush
+// completes, any faults that were present in the HW buffer have been moved to
+// the shadow buffer, or have been discarded by RM.
+static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_fault_buffer_flush_mode_t flush_mode)
 {
-    NV_STATUS status = NV_OK;
+    NV_STATUS status;
+    NvBool is_flush_mode_move;
+
+    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
+    UVM_ASSERT((flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE) || (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_DISCARD));

-    // When Confidential Computing is enabled, GSP-RM owns the HW replayable
-    // fault buffer. Flushing the fault buffer implies flushing both the HW
-    // buffer (using a RM API), and the SW buffer accessible by UVM ("shadow"
-    // buffer).
-    //
-    // The HW buffer needs to be flushed first. This is because, once that
-    // flush completes, any faults that were present in the HW buffer when
-    // fault_buffer_flush_locked is called, are now either flushed from the HW
-    // buffer, or are present in the shadow buffer and are about to be discarded
-    // too.
    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

-    // Flush the HW replayable buffer owned by GSP-RM.
-    status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
+    is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
+    status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);

    UVM_ASSERT(status == NV_OK);

@ -595,10 +614,9 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,

    // Read PUT pointer from the GPU if requested
    if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT || flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT) {
-        status = hw_fault_buffer_flush_locked(parent_gpu);
+        status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
        if (status != NV_OK)
            return status;
-
        replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
    }

@ -1435,7 +1453,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
                                                uvm_fault_access_type_to_prot(service_access_type)))
            continue;

-        thrashing_hint = uvm_perf_thrashing_get_hint(va_block, current_entry->fault_address, gpu->id);
+        thrashing_hint = uvm_perf_thrashing_get_hint(va_block,
+                                                     block_context->block_context,
+                                                     current_entry->fault_address,
+                                                     gpu->id);
        if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
            // Throttling is implemented by sleeping in the fault handler on
            // the CPU and by continuing to process faults on other pages on
@ -1981,7 +2002,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
    // in the HW buffer. When GSP owns the HW buffer, we also have to wait for
    // GSP to copy all available faults from the HW buffer into the shadow
    // buffer.
-    status = hw_fault_buffer_flush_locked(gpu->parent);
+    status = hw_fault_buffer_flush_locked(gpu->parent, HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
    if (status != NV_OK)
        goto done;

@ -2738,14 +2759,14 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu, uvm_fau
         (uvm_enable_builtin_tests &&
          parent_gpu->rm_info.isSimulated &&
          batch_context->num_invalid_prefetch_faults > 5))) {
-        uvm_gpu_disable_prefetch_faults(parent_gpu);
+        uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
    }
    else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
        NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;

        // Reenable prefetch faults after some time
        if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
-            uvm_gpu_enable_prefetch_faults(parent_gpu);
+            uvm_parent_gpu_enable_prefetch_faults(parent_gpu);
    }
 }

@ -2872,7 +2893,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
        UVM_DBG_PRINT("Error servicing replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
 }

-void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@ -2883,7 +2904,7 @@ void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
    }
 }

-void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
    UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@ -2940,7 +2961,7 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA

    do {
        uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
-        pending = uvm_gpu_replayable_faults_pending(gpu->parent);
+        pending = uvm_parent_gpu_replayable_faults_pending(gpu->parent);
        uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);

        if (!pending)
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -52,12 +52,12 @@ typedef enum

 const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);

-NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);

-void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);

-bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
+bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);

 // Clear valid bit for all remaining unserviced faults in the buffer, set GET to
 // PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
@ -68,8 +68,8 @@ bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
 NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);

 // Enable/disable HW support for prefetch-initiated faults
-void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);

 // Service pending replayable faults on the given GPU. This function must be
 // only called from the ISR bottom half
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@ -1306,7 +1306,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
    uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
    uvm_va_policy_node_t *node;
    uvm_va_block_region_t region;
-    uvm_processor_mask_t map_processors;
+    uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction;
    uvm_processor_id_t id;
    NV_STATUS tracker_status;
    NV_STATUS status = NV_OK;
@ -1333,9 +1333,9 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,

            // Exclude the processors that have been already mapped due to
            // AccessedBy.
-            uvm_processor_mask_andnot(&map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
+            uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);

-            for_each_gpu_id_in_mask(id, &map_processors) {
+            for_each_gpu_id_in_mask(id, map_processors) {
                uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
                uvm_va_block_gpu_state_t *gpu_state;

@ -1866,7 +1866,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
                                unsigned long *dst_pfns,
                                uvm_page_mask_t *same_devmem_page_mask)
 {
-    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
+    uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index);
    uvm_va_block_region_t chunk_region;
    struct page *dst_page;

@ -2708,7 +2708,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
                // Since there is a CPU resident page, there shouldn't be one
                // anywhere else. TODO: Bug 3660922: Need to handle read
                // duplication at some point.
-                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
+                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
+                                                                        service_context->block_context,
+                                                                        page_index));

                // migrate_vma_setup() was able to isolate and lock the page;
                // therefore, it is CPU resident and not mapped.
@ -2725,8 +2727,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
            // used for GPU to GPU copies. It can't be an evicted page because
            // migrate_vma_setup() would have found a source page.
            if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
-                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
-
+                UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
+                                                                        service_context->block_context,
+                                                                        page_index));
                hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
            }
        }
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2019 NVidia Corporation
+    Copyright (c) 2013-2023 NVidia Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -320,7 +320,7 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS];                    // IN
+    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1];                 // IN
    NvU32           numGpus;                                       // IN
    NvU64           serverId                    NV_ALIGN_BYTES(8); // OUT
    NV_STATUS       rmStatus;                                      // OUT
@ -344,9 +344,9 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // OUT
-    NvU32           validCount;                 // OUT
-    NV_STATUS       rmStatus;                   // OUT
+    NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT
+    NvU32           validCount;                    // OUT
+    NV_STATUS       rmStatus;                      // OUT
 } UVM_GET_GPU_UUID_TABLE_PARAMS;

 #if defined(WIN32) || defined(WIN64)
@ -494,7 +494,7 @@ typedef struct
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
    NvU64                   offset                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NvS32                   rmCtrlFd;                                          // IN
    NvU32                   hClient;                                           // IN
@ -552,7 +552,7 @@ typedef struct

 typedef struct
 {
-    NvProcessorUuid gpu_uuid;    // IN
+    NvProcessorUuid gpu_uuid;    // IN/OUT
    NvBool          numaEnabled; // OUT
    NvS32           numaNodeId;  // OUT
    NvS32           rmCtrlFd;    // IN
@ -835,7 +835,14 @@ typedef struct

 //
 // Initialize any tracker object such as a queue or counter
-// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, UvmToolsCreateProcessorCounters
+// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
+// UvmToolsCreateProcessorCounters.
+// Note that the order of structure elements has the version as the last field.
+// This is used to tell whether the kernel supports V2 events or not because
+// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
+// field but V2 will. This is needed because it is possible to create an event
+// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
+// been called yet and the kernel version is unknown.
 //
 #define UVM_TOOLS_INIT_EVENT_TRACKER                                  UVM_IOCTL_BASE(56)
 typedef struct
@ -847,6 +854,8 @@ typedef struct
    NvU32           allProcessors;                        // IN
    NvU32           uvmFd;                                // IN
    NV_STATUS       rmStatus;                             // OUT
+    NvU32           requestedVersion;                     // IN
+    NvU32           grantedVersion;                       // OUT
 } UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;

 //
@ -927,6 +936,12 @@ typedef struct

 //
 // UvmToolsGetProcessorUuidTable
+// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
+// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
+// processor IDs (physical GPU UUIDs) will be reported.
+// tablePtr == 0 and count == 0 can be used to query how many processors are
+// present in order to dynamically allocate the correct size array since the
+// total number of processors is returned in 'count'.
 //
 #define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE                            UVM_IOCTL_BASE(64)
 typedef struct
@ -934,6 +949,7 @@ typedef struct
    NvU64     tablePtr                 NV_ALIGN_BYTES(8); // IN
    NvU32     count;                                      // IN/OUT
    NV_STATUS rmStatus;                                   // OUT
+    NvU32     version;                                    // OUT
 } UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;


@ -979,7 +995,7 @@ typedef struct
 {
    NvU64                   base                            NV_ALIGN_BYTES(8); // IN
    NvU64                   length                          NV_ALIGN_BYTES(8); // IN
-    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS];                    // IN
+    UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2];                 // IN
    NvU64                   gpuAttributesCount              NV_ALIGN_BYTES(8); // IN
    NV_STATUS               rmStatus;                                          // OUT
 } UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
        #define UVM_IS_CONFIG_HMM() 0
    #endif

+// ATS prefetcher uses hmm_range_fault() to query residency information.
+// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
+// of memory regions while hmm_range_fault() is being called, MMU interval
+// notifiers are needed.
+    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
+    #else
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
+    #endif
+
 // Various issues prevent us from using mmu_notifiers in older kernels. These
 // include:
 //  - ->release being called under RCU instead of SRCU: fixed by commit
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@ -633,8 +633,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
                                          uvm_gpu_t *mapping_gpu,
                                          const UvmGpuMemoryInfo *mem_info)
 {
-    uvm_gpu_t *owning_gpu = NULL;
-    uvm_gpu_t *gpu;
+    uvm_gpu_t *owning_gpu;

    if (mem_info->egm)
        UVM_ASSERT(mem_info->sysmem);
@ -653,16 +652,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
    // registered.
    // This also checks for if EGM owning GPU is registered.

-    // TODO: Bug 4351121: RM will return the GI UUID, but
-    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
-    // Match on GI UUID until the UVM user level API has been updated to use
-    // the GI UUID.
-    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
-            owning_gpu = gpu;
-            break;
-        }
-    }
+    owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
    if (!owning_gpu)
        return NV_ERR_INVALID_DEVICE;

@ -954,6 +944,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
        goto error;
    }

+    // Check for the maximum page size for the mapping of vidmem allocations,
+    // the vMMU segment size may limit the range of page sizes.
+    if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
+        (mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
+        mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
+
    mem_info.pageSize = mapping_page_size;

    status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
@ -989,7 +985,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
    if (uvm_api_range_invalid_4k(params->base, params->length))
        return NV_ERR_INVALID_ADDRESS;

-    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
+    if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
        return NV_ERR_INVALID_ARGUMENT;

    uvm_va_space_down_read_rm(va_space);
--- a/kernel-open/nvidia-uvm/uvm_migrate.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate.c
@ -86,7 +86,7 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,

    // Only map those pages that are not already mapped on destination
    for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
-        prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
+        prot = uvm_va_block_page_compute_highest_permission(va_block, va_block_context, dest_id, page_index);
        if (prot == UVM_PROT_NONE)
            continue;

--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -149,6 +149,26 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
    return NV_OK;
 }

+// The aperture may filter the biggest page size:
+// - UVM_APERTURE_VID       biggest page size on vidmem mappings
+// - UVM_APERTURE_SYS       biggest page size on sysmem mappings
+// - UVM_APERTURE_PEER_0-7  biggest page size on peer mappings
+static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
+{
+    UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
+
+    // There may be scenarios where the GMMU must use a subset of the supported
+    // page sizes, e.g., to comply with the vMMU supported page sizes due to
+    // segmentation sizes.
+    if (aperture == UVM_APERTURE_VID) {
+        UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
+        return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
+    }
+    else {
+        return 1 << __fls(tree->hal->page_sizes());
+    }
+}
+
 static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
                                          NvLength size,
                                          uvm_pmm_alloc_flags_t pmm_flags,
@ -856,7 +876,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
    if (!page_tree_ats_init_required(tree))
        return NV_OK;

-    page_size = uvm_mmu_biggest_page_size(tree);
+    page_size = mmu_biggest_page_size(tree, UVM_APERTURE_VID);

    uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);

@ -1090,6 +1110,8 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
    tree->gpu_va_space = gpu_va_space;
    tree->big_page_size = big_page_size;

+    UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
+
    page_tree_set_location(tree, location);

    uvm_tracker_init(&tree->tracker);
@ -2301,7 +2323,7 @@ NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)

    UVM_ASSERT(!uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));

-    page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
+    page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_VID);
    size = UVM_ALIGN_UP(gpu->mem_info.max_allocatable_address + 1, page_size);

    UVM_ASSERT(page_size);
@ -2338,9 +2360,9 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
    if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
        return NV_OK;

-    page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
-    size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
    aperture = uvm_gpu_peer_aperture(gpu, peer);
+    page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
+    size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
    peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
    phys_offset = 0ULL;

@ -2783,7 +2805,7 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
    // sysmem mappings with 128K entries.
    UVM_ASSERT(is_power_of_2(mapping_size));
    UVM_ASSERT(mapping_size >= UVM_SIZE_1GB);
-    UVM_ASSERT(mapping_size >= uvm_mmu_biggest_page_size(&gpu->address_space_tree));
+    UVM_ASSERT(mapping_size >= mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS));
    UVM_ASSERT(mapping_size <= flat_sysmem_va_size);

    flat_sysmem_va_size = UVM_ALIGN_UP(flat_sysmem_va_size, mapping_size);
@ -2828,7 +2850,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
        if (sysmem_mapping->range_vec == NULL) {
            uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
            NvU64 phys_offset = curr_pa;
-            NvU32 page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
+            NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
            uvm_pmm_alloc_flags_t pmm_flags;

            // No eviction is requested when allocating the page tree storage,
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -612,6 +612,9 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
    return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
 }

+// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
+// the largest page size supported in a given system, which considers the GMMU
+// and vMMU page sizes and segment sizes.
 static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
 {
    UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
@ -642,11 +645,6 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
    return page_size;
 }

-static NvU32 uvm_mmu_biggest_page_size(uvm_page_tree_t *tree)
-{
-    return 1 << __fls(tree->hal->page_sizes());
-}
-
 static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
 {
    return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.c
@ -1442,6 +1442,7 @@ static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_locatio

 static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
                                                                  uvm_va_block_t *va_block,
+                                                                  uvm_va_block_context_t *va_block_context,
                                                                  uvm_page_index_t page_index,
                                                                  page_thrashing_info_t *page_thrashing,
                                                                  uvm_processor_id_t requester)
@ -1460,7 +1461,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras

    hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;

-    closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
+    closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
    if (uvm_va_block_is_hmm(va_block)) {
        // HMM pages always start out resident on the CPU but may not be
        // recorded in the va_block state because hmm_range_fault() or
@ -1601,6 +1602,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
 //   that case we keep the page pinned while applying the same algorithm as in
 //   Phase1.
 uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
+                                                      uvm_va_block_context_t *va_block_context,
                                                      NvU64 address,
                                                      uvm_processor_id_t requester)
 {
@ -1713,6 +1715,7 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
    else {
        hint = get_hint_for_migration_thrashing(va_space_thrashing,
                                                va_block,
+                                                va_block_context,
                                                page_index,
                                                page_thrashing,
                                                requester);
--- a/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
+++ b/kernel-open/nvidia-uvm/uvm_perf_thrashing.h
@ -74,7 +74,9 @@ typedef struct
 } uvm_perf_thrashing_hint_t;

 // Obtain a hint to prevent thrashing on the page with given address
-uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block, NvU64 address,
+uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
+                                                      uvm_va_block_context_t *va_block_context,
+                                                      NvU64 address,
                                                      uvm_processor_id_t requester);

 // Obtain a pointer to a mask with the processors that are thrashing on the
--- a/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_gpu.c
@ -1408,8 +1408,6 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
    uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(accessing_gpu, gpu);
    uvm_gpu_identity_mapping_t *gpu_peer_mapping;

-    UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
-
    if (peer_caps->is_indirect_peer ||
        (accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)) {
        // Indirect peers are accessed as sysmem addresses, so they don't need
--- a/kernel-open/nvidia-uvm/uvm_pmm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_pmm_test.c
@ -1082,6 +1082,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
 {
    uvm_va_range_t *va_range;
    uvm_va_block_t *va_block = NULL;
+    uvm_va_block_context_t *va_block_context = NULL;
    NvU32 num_blocks;
    NvU32 index = 0;
    uvm_gpu_phys_address_t phys_addr = {0};
@ -1099,9 +1100,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
    }
    TEST_CHECK_RET(va_block);

+    va_block_context = uvm_va_block_context_alloc(NULL);
+    TEST_CHECK_RET(va_block_context);
+
    uvm_mutex_lock(&va_block->lock);

-    is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, 0, gpu->id), gpu->id);
+    is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
    if (is_resident) {
        phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
        phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
@ -1109,6 +1113,8 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t

    uvm_mutex_unlock(&va_block->lock);

+    uvm_va_block_context_free(va_block_context);
+
    TEST_CHECK_RET(is_resident);

    // Perform the lookup for the whole root chunk
--- a/kernel-open/nvidia-uvm/uvm_processors.c
+++ b/kernel-open/nvidia-uvm/uvm_processors.c
@ -25,6 +25,8 @@
 #include "uvm_processors.h"

 static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
+const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
+const uvm_processor_mask_t g_uvm_processor_mask_empty = { };

 NV_STATUS uvm_processor_mask_cache_init(void)
 {
--- a/kernel-open/nvidia-uvm/uvm_processors.h
+++ b/kernel-open/nvidia-uvm/uvm_processors.h
@ -522,6 +522,9 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t,              \
                   uvm_processor_id_t,                \
                   uvm_id_from_value)

+extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
+extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
+
 // Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
 // Returns whether the GPUs in subset are a subset of the GPUs in mask.
 bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
@ -567,6 +570,10 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
         (uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
         i = uvm_gpu_id_next(i))

+// Helper to iterate over all sub processor indexes.
+#define for_each_sub_processor_index(i) \
+    for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
+
 // Helper to iterate over all valid processor ids.
 #define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))

--- a/kernel-open/nvidia-uvm/uvm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_test.c
@ -41,15 +41,11 @@
 static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
 {
    NvU64 retained_count = 0;
-    uvm_parent_gpu_t *parent_gpu;
    uvm_gpu_t *gpu = NULL;

    uvm_mutex_lock(&g_uvm_global.global_lock);

-    parent_gpu = uvm_parent_gpu_get_by_uuid(&params->gpu_uuid);
-    if (parent_gpu)
-        gpu = uvm_gpu_get_by_parent_and_swizz_id(parent_gpu, params->swizz_id);
-
+    gpu = uvm_gpu_get_by_uuid(&params->gpu_uuid);
    if (gpu != NULL)
        retained_count = uvm_gpu_retained_count(gpu);

--- a/kernel-open/nvidia-uvm/uvm_test_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_test_ioctl.h
@ -40,7 +40,6 @@ typedef struct
 {
    // In params
    NvProcessorUuid gpu_uuid;
-    NvU32           swizz_id;
    // Out params
    NvU64           ref_count NV_ALIGN_BYTES(8);
    NV_STATUS       rmStatus;
@ -192,7 +191,7 @@ typedef struct
    NvU32                           read_duplication;                                   // Out (UVM_TEST_READ_DUPLICATION_POLICY)
    NvProcessorUuid                 preferred_location;                                 // Out
    NvS32                           preferred_cpu_nid;                                  // Out
-    NvProcessorUuid                 accessed_by[UVM_MAX_PROCESSORS];                    // Out
+    NvProcessorUuid                 accessed_by[UVM_MAX_PROCESSORS_V2];                 // Out
    NvU32                           accessed_by_count;                                  // Out
    NvU32                           type;                                               // Out (UVM_TEST_VA_RANGE_TYPE)
    union
@ -505,7 +504,12 @@ typedef struct
 typedef struct
 {
    // In params
-    UvmEventEntry entry; // contains only NvUxx types
+    union
+    {
+        UvmEventEntry_V1 entry_v1; // contains only NvUxx types
+        UvmEventEntry_V2 entry_v2; // contains only NvUxx types
+    };
+    NvU32 version;
    NvU32 count;

    // Out param
@ -620,7 +624,7 @@ typedef struct

    // Array of processors which have a resident copy of the page containing
    // lookup_address.
-    NvProcessorUuid                 resident_on[UVM_MAX_PROCESSORS];                    // Out
+    NvProcessorUuid                 resident_on[UVM_MAX_PROCESSORS_V2];                 // Out
    NvU32                           resident_on_count;                                  // Out

    // If the memory is resident on the CPU, the NUMA node on which the page
@ -631,24 +635,24 @@ typedef struct
    // system-page-sized portion of this allocation which contains
    // lookup_address is guaranteed to be resident on the corresponding
    // processor.
-    NvU32                           resident_physical_size[UVM_MAX_PROCESSORS];         // Out
+    NvU32                           resident_physical_size[UVM_MAX_PROCESSORS_V2];      // Out

    // The physical address of the physical allocation backing lookup_address.
-    NvU64                           resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
+    NvU64                           resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out

    // Array of processors which have a virtual mapping covering lookup_address.
-    NvProcessorUuid                 mapped_on[UVM_MAX_PROCESSORS];                      // Out
-    NvU32                           mapping_type[UVM_MAX_PROCESSORS];                   // Out
-    NvU64                           mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
+    NvProcessorUuid                 mapped_on[UVM_MAX_PROCESSORS_V2];                   // Out
+    NvU32                           mapping_type[UVM_MAX_PROCESSORS_V2];                // Out
+    NvU64                           mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
    NvU32                           mapped_on_count;                                    // Out

    // The size of the virtual mapping covering lookup_address on each
    // mapped_on processor.
-    NvU32                           page_size[UVM_MAX_PROCESSORS];                      // Out
+    NvU32                           page_size[UVM_MAX_PROCESSORS_V2];                   // Out

    // Array of processors which have physical memory populated that would back
    // lookup_address if it was resident.
-    NvProcessorUuid                 populated_on[UVM_MAX_PROCESSORS];                   // Out
+    NvProcessorUuid                 populated_on[UVM_MAX_PROCESSORS_V2];                // Out
    NvU32                           populated_on_count;                                 // Out

    NV_STATUS rmStatus;                                                                 // Out
--- a/kernel-open/nvidia-uvm/uvm_tools.c
+++ b/kernel-open/nvidia-uvm/uvm_tools.c
--- a/kernel-open/nvidia-uvm/uvm_types.h
+++ b/kernel-open/nvidia-uvm/uvm_types.h
@ -52,8 +52,19 @@ typedef enum

 typedef unsigned long long UvmStream;

-#define UVM_MAX_GPUS         NV_MAX_DEVICES
-#define UVM_MAX_PROCESSORS   (UVM_MAX_GPUS + 1)
+// The maximum number of GPUs changed when multiple MIG instances per
+// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
+#define UVM_MAX_GPUS_V1       NV_MAX_DEVICES
+#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
+#define UVM_MAX_GPUS_V2       (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
+#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
+
+// For backward compatibility:
+// TODO: Bug 4465348: remove these after replacing old references.
+#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
+#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
+
+#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))

 #define UVM_INIT_FLAGS_DISABLE_HMM                       ((NvU64)0x1)
 #define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE        ((NvU64)0x2)
@ -152,6 +163,8 @@ typedef enum {

 typedef struct
 {
+    // UUID of the physical GPU if the GPU is not SMC capable or SMC enabled,
+    // or the GPU instance UUID of the partition.
    NvProcessorUuid gpuUuid;
    NvU32           gpuMappingType;     // UvmGpuMappingType
    NvU32           gpuCachingType;     // UvmGpuCachingType
@ -410,7 +423,29 @@ typedef struct
    NvU32 pid;                // process id causing the fault
    NvU32 threadId;           // thread id causing the fault
    NvU64 pc;                 // address of the instruction causing the fault
-} UvmEventCpuFaultInfo;
+} UvmEventCpuFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be 1st argument of this structure. Setting eventType to
+    // UvmEventTypeMemoryViolation helps to identify event data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 accessType;          // read/write violation (UvmEventMemoryAccessType)
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets.
+    //
+    NvU16 padding16Bits;
+    NvS32 nid;                // NUMA node ID of faulting CPU
+    NvU64 address;            // faulting address
+    NvU64 timeStamp;          // cpu time when the fault occurred
+    NvU32 pid;                // process id causing the fault
+    NvU32 threadId;           // thread id causing the fault
+    NvU64 pc;                 // address of the instruction causing the fault
+} UvmEventCpuFaultInfo_V2;

 typedef enum
 {
@ -567,7 +602,49 @@ typedef struct
                                   // on the gpu
    NvU64 endTimeStampGpu;         // time stamp when the migration finished
                                   // on the gpu
-} UvmEventMigrationInfo;
+} UvmEventMigrationInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure. Setting eventType
+    // to UvmEventTypeMigration helps to identify event data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // Cause that triggered the migration
+    //
+    NvU8 migrationCause;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU16 padding16Bits;
+    //
+    // Indices are used for the source and destination of migration instead of
+    // using gpu uuid/cpu id. This reduces the size of each event. The index to
+    // gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable.
+    // Currently we do not distinguish between CPUs so they all use index 0.
+    //
+    NvU16 srcIndex;                // source CPU/GPU index
+    NvU16 dstIndex;                // destination CPU/GPU index
+    NvS32 srcNid;                  // source CPU NUMA node ID
+    NvS32 dstNid;                  // destination CPU NUMA node ID
+    NvU64 address;                 // base virtual addr used for migration
+    NvU64 migratedBytes;           // number of bytes migrated
+    NvU64 beginTimeStamp;          // cpu time stamp when the memory transfer
+                                   // was queued on the gpu
+    NvU64 endTimeStamp;            // cpu time stamp when the memory transfer
+                                   // finalization was communicated to the cpu
+                                   // For asynchronous operations this field
+                                   // will be zero
+    NvU64 rangeGroupId;            // range group tied with this migration
+    NvU64 beginTimeStampGpu;       // time stamp when the migration started
+                                   // on the gpu
+    NvU64 endTimeStampGpu;         // time stamp when the migration finished
+                                   // on the gpu
+} UvmEventMigrationInfo_V2;

 typedef enum
 {
@ -633,7 +710,64 @@ typedef struct
    //
    NvU8 padding8Bits;
    NvU16 padding16Bits;
-} UvmEventGpuFaultInfo;
+} UvmEventGpuFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeGpuFault helps to identify event data in
+    // a queue.
+    //
+    NvU8 eventType;
+    NvU8 faultType;       // type of gpu fault, refer UvmEventFaultType
+    NvU8 accessType;      // memory access type, refer UvmEventMemoryAccessType
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8Bits_1;
+    union
+    {
+        NvU16 gpcId;      // If this is a replayable fault, this field contains
+                          // the physical GPC index where the fault was
+                          // triggered
+
+        NvU16 channelId;  // If this is a non-replayable fault, this field
+                          // contains the id of the channel that launched the
+                          // operation that caused the fault.
+                          //
+                          // TODO: Bug 3283289: this field is ambiguous for
+                          // Ampere+ GPUs, but it is never consumed by clients.
+    };
+    NvU16 clientId;       // Id of the MMU client that triggered the fault. This
+                          // is the value provided by HW and is architecture-
+                          // specific. There are separate client ids for
+                          // different client types (See dev_fault.h).
+    NvU64 address;        // virtual address at which gpu faulted
+    NvU64 timeStamp;      // time stamp when the cpu started processing the
+                          // fault
+    NvU64 timeStampGpu;   // gpu time stamp when the fault entry was written
+                          // in the fault buffer
+    NvU32 batchId;        // Per-GPU unique id to identify the faults serviced
+                          // in batch before:
+                          // - Issuing a replay for replayable faults
+                          // - Re-scheduling the channel for non-replayable
+                          //   faults.
+    NvU8 clientType;      // Volta+ GPUs can fault on clients other than GR.
+                          // UvmEventFaultClientTypeGpc indicates replayable
+                          // fault, while UvmEventFaultClientTypeHub indicates
+                          // non-replayable fault.
+
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8Bits_2;
+    NvU16 gpuIndex;       // GPU that experienced the fault
+} UvmEventGpuFaultInfo_V2;

 //------------------------------------------------------------------------------
 // This info is provided when a gpu fault is replayed (for replayable faults)
@ -666,7 +800,25 @@ typedef struct
                            // accesses is queued on the gpu
    NvU64 timeStampGpu;     // gpu time stamp when the replay operation finished
                            // executing on the gpu
-} UvmEventGpuFaultReplayInfo;
+} UvmEventGpuFaultReplayInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 clientType;        // See clientType in UvmEventGpuFaultInfo
+    NvU16 gpuIndex;         // GPU that experienced the fault
+    NvU32 batchId;          // Per-GPU unique id to identify the faults that
+                            // have been serviced in batch
+    NvU64 timeStamp;        // cpu time when the replay of the faulting memory
+                            // accesses is queued on the gpu
+    NvU64 timeStampGpu;     // gpu time stamp when the replay operation finished
+                            // executing on the gpu
+} UvmEventGpuFaultReplayInfo_V2;

 //------------------------------------------------------------------------------
 // This info is provided per fatal fault
@ -689,7 +841,26 @@ typedef struct
    NvU16 padding16bits;
    NvU64 address;        // virtual address at which the processor faulted
    NvU64 timeStamp;      // CPU time when the fault is detected to be fatal
-} UvmEventFatalFaultInfo;
+} UvmEventFatalFaultInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeFatalFault helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    NvU8 faultType;       // type of gpu fault, refer UvmEventFaultType. Only
+                          // valid if processorIndex is a GPU
+    NvU8 accessType;      // memory access type, refer UvmEventMemoryAccessType
+    NvU8 reason;          // reason why the fault is fatal, refer
+                          // UvmEventFatalReason
+    NvU16 processorIndex; // processor that experienced the fault
+    NvU16 padding16bits;
+    NvU64 address;        // virtual address at which the processor faulted
+    NvU64 timeStamp;      // CPU time when the fault is detected to be fatal
+} UvmEventFatalFaultInfo_V2;

 typedef struct
 {
@ -718,7 +889,38 @@ typedef struct
                            // participate in read-duplicate this is time stamp
                            // when all the operations have been pushed to all
                            // the processors.
-} UvmEventReadDuplicateInfo;
+} UvmEventReadDuplicateInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeReadDuplicate helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits;
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // read-duplicated
+    NvU64 size;             // size in bytes of the memory region that is
+                            // read-duplicated
+    NvU64 timeStamp;        // cpu time stamp when the memory region becomes
+                            // read-duplicate. Since many processors can
+                            // participate in read-duplicate this is time stamp
+                            // when all the operations have been pushed to all
+                            // the processors.
+    NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
+                            // mask that specifies in which processors this
+                            // memory region is read-duplicated. This is last
+                            // so UVM_PROCESSOR_MASK_SIZE can grow.
+} UvmEventReadDuplicateInfo_V2;

 typedef struct
 {
@ -728,13 +930,13 @@ typedef struct
    // identify event data in a queue.
    //
    NvU8 eventType;
+    NvU8 residentIndex;     // index of the cpu/gpu that now contains the only
+                            // valid copy of the memory region
    //
    // This structure is shared between UVM kernel and tools.
    // Manually padding the structure so that compiler options like pragma pack
    // or malign-double will have no effect on the field offsets
    //
-    NvU8 residentIndex;     // index of the cpu/gpu that now contains the only
-                            // valid copy of the memory region
    NvU16 padding16bits;
    NvU32 padding32bits;
    NvU64 address;          // virtual address of the memory region that is
@ -746,8 +948,34 @@ typedef struct
                            // participate in read-duplicate this is time stamp
                            // when all the operations have been pushed to all
                            // the processors.
-} UvmEventReadDuplicateInvalidateInfo;
+} UvmEventReadDuplicateInvalidateInfo_V1;

+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to
+    // identify event data in a queue.
+    //
+    NvU8 eventType;
+    NvU8 padding8bits;
+    NvU16 residentIndex;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // read-duplicated
+    NvU64 size;             // size of the memory region that is
+                            // read-duplicated
+    NvU64 timeStamp;        // cpu time stamp when the memory region is no
+                            // longer read-duplicate. Since many processors can
+                            // participate in read-duplicate this is time stamp
+                            // when all the operations have been pushed to all
+                            // the processors.
+} UvmEventReadDuplicateInvalidateInfo_V2;

 typedef struct
 {
@ -770,7 +998,30 @@ typedef struct
                            // changed
    NvU64 timeStamp;        // cpu time stamp when the new page size is
                            // queued on the gpu
-} UvmEventPageSizeChangeInfo;
+} UvmEventPageSizeChangeInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypePageSizeChange helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8bits;
+    NvU16 processorIndex;   // cpu/gpu processor index for which the page size
+                            // changed
+    NvU32 size;             // new page size
+    NvU64 address;          // virtual address of the page whose size has
+                            // changed
+    NvU64 timeStamp;        // cpu time stamp when the new page size is
+                            // queued on the gpu
+} UvmEventPageSizeChangeInfo_V2;

 typedef struct
 {
@ -794,7 +1045,33 @@ typedef struct
                            // thrashing
    NvU64 size;             // size of the memory region that is thrashing
    NvU64 timeStamp;        // cpu time stamp when thrashing is detected
-} UvmEventThrashingDetectedInfo;
+} UvmEventThrashingDetectedInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrashingDetected helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8 padding8bits;
+    NvU16 padding16bits;
+    NvU32 padding32bits;
+    NvU64 address;          // virtual address of the memory region that is
+                            // thrashing
+    NvU64 size;             // size of the memory region that is thrashing
+    NvU64 timeStamp;        // cpu time stamp when thrashing is detected
+    NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
+                            // mask that specifies which processors are
+                            // fighting for this memory region. This is last
+                            // so UVM_PROCESSOR_MASK_SIZE can grow.
+} UvmEventThrashingDetectedInfo_V2;

 typedef struct
 {
@ -815,7 +1092,28 @@ typedef struct
    NvU64 address;          // address of the page whose servicing is being
                            // throttled
    NvU64 timeStamp;        // cpu start time stamp for the throttling operation
-} UvmEventThrottlingStartInfo;
+} UvmEventThrottlingStartInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrottlingStart helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits[2];
+    NvU16 processorIndex;   // index of the cpu/gpu that was throttled
+    NvU64 address;          // address of the page whose servicing is being
+                            // throttled
+    NvU64 timeStamp;        // cpu start time stamp for the throttling operation
+} UvmEventThrottlingStartInfo_V2;

 typedef struct
 {
@ -836,7 +1134,28 @@ typedef struct
    NvU64 address;          // address of the page whose servicing is being
                            // throttled
    NvU64 timeStamp;        // cpu end time stamp for the throttling operation
-} UvmEventThrottlingEndInfo;
+} UvmEventThrottlingEndInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeThrottlingEnd helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits[2];
+    NvU16 processorIndex;   // index of the cpu/gpu that was throttled
+    NvU64 address;          // address of the page whose servicing is being
+                            // throttled
+    NvU64 timeStamp;        // cpu end time stamp for the throttling operation
+} UvmEventThrottlingEndInfo_V2;

 typedef enum
 {
@ -892,7 +1211,36 @@ typedef struct
    NvU64 timeStampGpu;     // time stamp when the new mapping is effective in
                            // the processor specified by srcIndex. If srcIndex
                            // is a cpu, this field will be zero.
-} UvmEventMapRemoteInfo;
+} UvmEventMapRemoteInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeMapRemote helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    NvU8 mapRemoteCause;    // field to type UvmEventMapRemoteCause that tells
+                            // the cause for the page to be mapped remotely
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU16 padding16bits;
+    NvU16 srcIndex;         // index of the cpu/gpu being remapped
+    NvU16 dstIndex;         // index of the cpu/gpu memory that contains the
+                            // memory region data
+    NvU64 address;          // virtual address of the memory region that is
+                            // thrashing
+    NvU64 size;             // size of the memory region that is thrashing
+    NvU64 timeStamp;        // cpu time stamp when all the required operations
+                            // have been pushed to the processor
+    NvU64 timeStampGpu;     // time stamp when the new mapping is effective in
+                            // the processor specified by srcIndex. If srcIndex
+                            // is a cpu, this field will be zero.
+} UvmEventMapRemoteInfo_V2;

 typedef struct
 {
@ -918,7 +1266,33 @@ typedef struct
    NvU64 addressIn;        // virtual address that caused the eviction
    NvU64 size;             // size of the memory region that being evicted
    NvU64 timeStamp;        // cpu time stamp when eviction starts on the cpu
-} UvmEventEvictionInfo;
+} UvmEventEvictionInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeEviction helps to identify event data
+    // in a queue.
+    //
+    NvU8 eventType;
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 padding16bits;
+    NvU16 srcIndex;         // index of the cpu/gpu from which data is being
+                            // evicted
+    NvU16 dstIndex;         // index of the cpu/gpu memory to which data is
+                            // going to be stored
+    NvU64 addressOut;       // virtual address of the memory region that is
+                            // being evicted
+    NvU64 addressIn;        // virtual address that caused the eviction
+    NvU64 size;             // size of the memory region that being evicted
+    NvU64 timeStamp;        // cpu time stamp when eviction starts on the cpu
+} UvmEventEvictionInfo_V2;

 // TODO: Bug 1870362: [uvm] Provide virtual address and processor index in
 // AccessCounter events
@ -978,7 +1352,44 @@ typedef struct
    NvU32 bank;
    NvU64 address;
    NvU64 instancePtr;
-} UvmEventTestAccessCounterInfo;
+} UvmEventTestAccessCounterInfo_V1;
+
+typedef struct
+{
+    //
+    // eventType has to be the 1st argument of this structure.
+    // Setting eventType = UvmEventTypeAccessCounter helps to identify event
+    // data in a queue.
+    //
+    NvU8 eventType;
+    // See uvm_access_counter_buffer_entry_t for details
+    NvU8 aperture;
+    NvU8 instancePtrAperture;
+    NvU8 isVirtual;
+    NvU8 isFromCpu;
+    NvU8 veId;
+
+    // The physical access counter notification was triggered on a managed
+    // memory region. This is not set for virtual access counter notifications.
+    NvU8 physOnManaged;
+
+    //
+    // This structure is shared between UVM kernel and tools.
+    // Manually padding the structure so that compiler options like pragma pack
+    // or malign-double will have no effect on the field offsets
+    //
+    NvU8  padding8bits;
+    NvU16 srcIndex;         // index of the gpu that received the access counter
+                            // notification
+    NvU16 padding16bits;
+    NvU32 value;
+    NvU32 subGranularity;
+    NvU32 tag;
+    NvU32 bank;
+    NvU32 padding32bits;
+    NvU64 address;
+    NvU64 instancePtr;
+} UvmEventTestAccessCounterInfo_V2;

 typedef struct
 {
@ -998,30 +1409,64 @@ typedef struct
            NvU8 eventType;
            UvmEventMigrationInfo_Lite migration_Lite;

-            UvmEventCpuFaultInfo cpuFault;
-            UvmEventMigrationInfo migration;
-            UvmEventGpuFaultInfo gpuFault;
-            UvmEventGpuFaultReplayInfo gpuFaultReplay;
-            UvmEventFatalFaultInfo fatalFault;
-            UvmEventReadDuplicateInfo readDuplicate;
-            UvmEventReadDuplicateInvalidateInfo readDuplicateInvalidate;
-            UvmEventPageSizeChangeInfo pageSizeChange;
-            UvmEventThrashingDetectedInfo thrashing;
-            UvmEventThrottlingStartInfo throttlingStart;
-            UvmEventThrottlingEndInfo throttlingEnd;
-            UvmEventMapRemoteInfo mapRemote;
-            UvmEventEvictionInfo eviction;
+            UvmEventCpuFaultInfo_V1 cpuFault;
+            UvmEventMigrationInfo_V1 migration;
+            UvmEventGpuFaultInfo_V1 gpuFault;
+            UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
+            UvmEventFatalFaultInfo_V1 fatalFault;
+            UvmEventReadDuplicateInfo_V1 readDuplicate;
+            UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate;
+            UvmEventPageSizeChangeInfo_V1 pageSizeChange;
+            UvmEventThrashingDetectedInfo_V1 thrashing;
+            UvmEventThrottlingStartInfo_V1 throttlingStart;
+            UvmEventThrottlingEndInfo_V1 throttlingEnd;
+            UvmEventMapRemoteInfo_V1 mapRemote;
+            UvmEventEvictionInfo_V1 eviction;
        } eventData;

        union
        {
            NvU8 eventType;

-            UvmEventTestAccessCounterInfo accessCounter;
+            UvmEventTestAccessCounterInfo_V1 accessCounter;
            UvmEventTestSplitInvalidateInfo splitInvalidate;
        } testEventData;
    };
-} UvmEventEntry;
+} UvmEventEntry_V1;
+
+typedef struct
+{
+    union
+    {
+        union
+        {
+            NvU8 eventType;
+            UvmEventMigrationInfo_Lite migration_Lite;
+
+            UvmEventCpuFaultInfo_V2 cpuFault;
+            UvmEventMigrationInfo_V2 migration;
+            UvmEventGpuFaultInfo_V2 gpuFault;
+            UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
+            UvmEventFatalFaultInfo_V2 fatalFault;
+            UvmEventReadDuplicateInfo_V2 readDuplicate;
+            UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate;
+            UvmEventPageSizeChangeInfo_V2 pageSizeChange;
+            UvmEventThrashingDetectedInfo_V2 thrashing;
+            UvmEventThrottlingStartInfo_V2 throttlingStart;
+            UvmEventThrottlingEndInfo_V2 throttlingEnd;
+            UvmEventMapRemoteInfo_V2 mapRemote;
+            UvmEventEvictionInfo_V2 eviction;
+        } eventData;
+
+        union
+        {
+            NvU8 eventType;
+
+            UvmEventTestAccessCounterInfo_V2 accessCounter;
+            UvmEventTestSplitInvalidateInfo splitInvalidate;
+        } testEventData;
+    };
+} UvmEventEntry_V2;

 //------------------------------------------------------------------------------
 // Type of time stamp used in the event entry:
@ -1060,7 +1505,12 @@ typedef enum
    UvmDebugAccessTypeWrite = 1,
 } UvmDebugAccessType;

-typedef struct UvmEventControlData_tag {
+typedef enum {
+    UvmToolsEventQueueVersion_V1 = 1,
+    UvmToolsEventQueueVersion_V2 = 2,
+} UvmToolsEventQueueVersion;
+
+typedef struct UvmEventControlData_V1_tag {
    // entries between get_ahead and get_behind are currently being read
    volatile NvU32 get_ahead;
    volatile NvU32 get_behind;
@ -1070,7 +1520,30 @@ typedef struct UvmEventControlData_tag {

    // counter of dropped events
    NvU64 dropped[UvmEventNumTypesAll];
-} UvmToolsEventControlData;
+} UvmToolsEventControlData_V1;
+
+typedef struct UvmEventControlData_V2_tag {
+    // entries between get_ahead and get_behind are currently being read
+    volatile NvU32 get_ahead;
+    volatile NvU32 get_behind;
+
+    // entries between put_ahead and put_behind are currently being written
+    volatile NvU32 put_ahead;
+    volatile NvU32 put_behind;
+
+    // The version values are limited to UvmToolsEventQueueVersion and
+    // initialized by UvmToolsCreateEventQueue().
+    NvU32 version;
+    NvU32 padding32Bits;
+
+    // counter of dropped events
+    NvU64 dropped[UvmEventNumTypesAll];
+} UvmToolsEventControlData_V2;
+
+// For backward compatibility:
+// TODO: Bug 4465348: remove these after replacing old references.
+typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
+typedef UvmEventEntry_V1 UvmEventEntry;

 //------------------------------------------------------------------------------
 // UVM Tools forward types (handles) definitions
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@ -706,11 +706,6 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
 // mm is used to initialize the value of va_block_context->mm. NULL is allowed.
 void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);

-// Return the preferred NUMA node ID for the block's policy.
-// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
-// is returned.
-int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
-
 // TODO: Bug 1766480: Using only page masks instead of a combination of regions
 //       and page masks could simplify the below APIs and their implementations
 //       at the cost of having to scan the whole mask for small regions.
@ -1546,7 +1541,11 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
 // The [src, src + size) range has to fit within a single PAGE_SIZE page.
 //
 // LOCKING: The caller must hold the va_block lock
-NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size);
+NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block,
+                                   uvm_va_block_context_t *va_block_context,
+                                   uvm_mem_t *dst,
+                                   NvU64 src,
+                                   size_t size);

 // Initialize va block retry tracking
 void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
@ -2090,11 +2089,14 @@ void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,

 // Count how many processors have a copy of the given page resident in their
 // memory.
-NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
+NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block,
+                                                  uvm_va_block_context_t *va_block_context,
+                                                  uvm_page_index_t page_index);

 // Get the processor with a resident copy of a page closest to the given
 // processor.
 uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
+                                                          uvm_va_block_context_t *va_block_context,
                                                          uvm_page_index_t page_index,
                                                          uvm_processor_id_t processor);

@ -2127,6 +2129,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
                                                  int nid,
                                                  uvm_page_index_t page_index);

+// Return the CPU chunk for the given page_index from the first available NUMA
+// node from the va_block. Should only be called for HMM va_blocks.
+// Locking: The va_block lock must be held.
+uvm_cpu_chunk_t *uvm_cpu_chunk_get_any_chunk_for_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
+
 // Return the struct page * from the chunk corresponding to the given page_index
 // Locking: The va_block lock must be held.
 struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
@ -2241,6 +2248,7 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
 // Return the maximum mapping protection for processor_id that will not require
 // any permision revocation on the rest of processors.
 uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
+                                                        uvm_va_block_context_t *va_block_context,
                                                        uvm_processor_id_t processor_id,
                                                        uvm_page_index_t page_index);

--- a/kernel-open/nvidia-uvm/uvm_va_block_types.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block_types.h
@ -175,6 +175,14 @@ typedef struct
    // Scratch node mask. This follows the same rules as scratch_page_mask;
    nodemask_t scratch_node_mask;

+    // Available as scratch space for the internal APIs. This is like a caller-
+    // save register: it shouldn't be used across function calls which also take
+    // this va_block_context.
+    uvm_processor_mask_t scratch_processor_mask;
+
+    // Temporary mask in block_add_eviction_mappings().
+    uvm_processor_mask_t map_processors_eviction;
+
    // State used by uvm_va_block_make_resident
    struct uvm_make_resident_context_struct
    {
@ -233,6 +241,16 @@ typedef struct
        // are removed as the operation progresses.
        uvm_page_mask_t revoke_running_page_mask;

+        // Mask used by block_gpu_split_2m and block_gpu_split_big to track
+        // splitting of big PTEs but they are never called concurrently. This
+        // mask can be used concurrently with other page masks.
+        uvm_page_mask_t big_split_page_mask;
+
+        // Mask used by block_unmap_gpu to track non_uvm_lite_gpus which have
+        // this block mapped. This mask can be used concurrently with other page
+        // masks.
+        uvm_processor_mask_t non_uvm_lite_gpus;
+
        uvm_page_mask_t page_mask;
        uvm_page_mask_t filtered_page_mask;
        uvm_page_mask_t migratable_mask;
@ -276,6 +294,10 @@ typedef struct
        struct vm_area_struct *vma;

 #if UVM_IS_CONFIG_HMM()
+
+        // Temporary mask used in uvm_hmm_block_add_eviction_mappings().
+        uvm_processor_mask_t map_processors_eviction;
+
        // Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
        struct migrate_vma migrate_vma_args;
 #endif
--- a/kernel-open/nvidia-uvm/uvm_va_range.c
+++ b/kernel-open/nvidia-uvm/uvm_va_range.c
@ -1799,7 +1799,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,

    if (uvm_api_range_invalid(params->base, params->length))
        return NV_ERR_INVALID_ADDRESS;
-    if (params->gpuAttributesCount > UVM_MAX_GPUS)
+    if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
        return NV_ERR_INVALID_ARGUMENT;

    if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2023 NVIDIA Corporation
+    Copyright (c) 2015-2024 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -86,11 +86,13 @@ static void init_tools_data(uvm_va_space_t *va_space)

    for (i = 0; i < ARRAY_SIZE(va_space->tools.counters); i++)
        INIT_LIST_HEAD(va_space->tools.counters + i);
-    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues); i++)
-        INIT_LIST_HEAD(va_space->tools.queues + i);
+    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v1); i++)
+        INIT_LIST_HEAD(va_space->tools.queues_v1 + i);
+    for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v2); i++)
+        INIT_LIST_HEAD(va_space->tools.queues_v2 + i);
 }

-static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
+static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
 {
    uvm_gpu_t *other_gpu;

@ -104,7 +106,7 @@ static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *

        peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);

-        if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1) {
+        if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1 || gpu->parent == other_gpu->parent) {
            NV_STATUS status = enable_peers(va_space, gpu, other_gpu);
            if (status != NV_OK)
                return status;
@ -324,10 +326,16 @@ static void unregister_gpu(uvm_va_space_t *va_space,
        }
    }

-    if (gpu->parent->isr.replayable_faults.handling)
+    if (gpu->parent->isr.replayable_faults.handling) {
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
        uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
-
-    uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
+        uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
+    }
+    else {
+        UVM_ASSERT(uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
+        uvm_processor_mask_clear(&va_space->non_faultable_processors, gpu->id);
+    }

    processor_mask_array_clear(va_space->can_access, gpu->id, gpu->id);
    processor_mask_array_clear(va_space->can_access, gpu->id, UVM_ID_CPU);
@ -514,7 +522,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
            nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);

        if (gpu->parent->access_counters_supported)
-            uvm_gpu_access_counters_disable(gpu, va_space);
+            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
    }

    // Check that all CPU/GPU affinity masks are empty
@ -604,7 +612,7 @@ uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProces
    uvm_gpu_t *gpu;

    for_each_va_space_gpu(gpu, va_space) {
-        if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid))
+        if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
            return gpu;
    }

@ -663,7 +671,8 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
                                    const NvProcessorUuid *gpu_uuid,
                                    const uvm_rm_user_object_t *user_rm_device,
                                    NvBool *numa_enabled,
-                                    NvS32 *numa_node_id)
+                                    NvS32 *numa_node_id,
+                                    NvProcessorUuid *uuid_out)
 {
    NV_STATUS status;
    uvm_va_range_t *va_range;
@ -675,13 +684,15 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    if (status != NV_OK)
        return status;

+    uvm_uuid_copy(uuid_out, &gpu->uuid);
+
    // Enabling access counters requires taking the ISR lock, so it is done
    // without holding the (deeper order) VA space lock. Enabling the counters
    // after dropping the VA space lock would create a window of time in which
    // another thread could see the GPU as registered, but access counters would
    // be disabled. Therefore, the counters are enabled before taking the VA
    // space lock.
-    if (uvm_gpu_access_counters_required(gpu->parent)) {
+    if (uvm_parent_gpu_access_counters_required(gpu->parent)) {
        status = uvm_gpu_access_counters_enable(gpu, va_space);
        if (status != NV_OK) {
            uvm_gpu_release(gpu);
@ -726,10 +737,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
    va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;

    if (gpu->parent->isr.replayable_faults.handling) {
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
        uvm_processor_mask_set(&va_space->faultable_processors, gpu->id);
+
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
        // System-wide atomics are enabled by default
        uvm_processor_mask_set(&va_space->system_wide_atomics_enabled_processors, gpu->id);
    }
+    else {
+        UVM_ASSERT(!uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
+        uvm_processor_mask_set(&va_space->non_faultable_processors, gpu->id);
+    }

    // All GPUs have native atomics on their own memory
    processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
@ -785,7 +803,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
        }
    }

-    status = register_gpu_nvlink_peers(va_space, gpu);
+    status = register_gpu_peers(va_space, gpu);
    if (status != NV_OK)
        goto cleanup;

@ -822,9 +840,9 @@ done:
    if (status != NV_OK) {
        // There is no risk of disabling access counters on a previously
        // registered GPU: the enablement step would have failed before even
-        // discovering that the GPU is already registed.
-        if (uvm_gpu_access_counters_required(gpu->parent))
-            uvm_gpu_access_counters_disable(gpu, va_space);
+        // discovering that the GPU is already registered.
+        if (uvm_parent_gpu_access_counters_required(gpu->parent))
+            uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);

        uvm_gpu_release(gpu);
    }
@ -876,15 +894,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
    // it from the VA space until we're done.
    uvm_va_space_up_read_rm(va_space);

-    // If uvm_gpu_access_counters_required(gpu->parent) is true, a concurrent
-    // registration could enable access counters after they are disabled here.
+    // If uvm_parent_gpu_access_counters_required(gpu->parent) is true, a
+    // concurrent registration could enable access counters after they are
+    // disabled here.
    // The concurrent registration will fail later on if it acquires the VA
    // space lock before the unregistration does (because the GPU is still
    // registered) and undo the access counters enablement, or succeed if it
    // acquires the VA space lock after the unregistration does. Both outcomes
    // result on valid states.
    if (gpu->parent->access_counters_supported)
-        uvm_gpu_access_counters_disable(gpu, va_space);
+        uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);

    // mmap_lock is needed to establish CPU mappings to any pages evicted from
    // the GPU if accessed by CPU is set for them.
@ -1040,6 +1059,10 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
            processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
        }
    }
+    else if (gpu0->parent == gpu1->parent) {
+        processor_mask_array_set(va_space->has_native_atomics, gpu0->id, gpu1->id);
+        processor_mask_array_set(va_space->has_native_atomics, gpu1->id, gpu0->id);
+    }

    UVM_ASSERT(va_space_check_processors_masks(va_space));
    __set_bit(table_index, va_space->enabled_peers);
@ -1091,6 +1114,7 @@ static NV_STATUS retain_pcie_peers_from_uuids(uvm_va_space_t *va_space,
 static bool uvm_va_space_pcie_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 {
    return !processor_mask_array_test(va_space->has_nvlink, gpu0->id, gpu1->id) &&
+           gpu0->parent != gpu1->parent &&
           uvm_va_space_peer_enabled(va_space, gpu0, gpu1);
 }

--- a/kernel-open/nvidia-uvm/uvm_va_space.h
+++ b/kernel-open/nvidia-uvm/uvm_va_space.h
@ -163,6 +163,10 @@ struct uvm_va_space_struct
    // faults.
    uvm_processor_mask_t faultable_processors;

+    // Mask of processors registered with the va space that don't support
+    // faulting.
+    uvm_processor_mask_t non_faultable_processors;
+
    // This is a count of non fault capable processors with a GPU VA space
    // registered.
    NvU32 num_non_faultable_gpu_va_spaces;
@ -261,8 +265,8 @@ struct uvm_va_space_struct
    // Mask of processors that are participating in system-wide atomics
    uvm_processor_mask_t system_wide_atomics_enabled_processors;

-    // Mask of GPUs where access counters are enabled on this VA space
-    uvm_processor_mask_t access_counters_enabled_processors;
+    // Mask of physical GPUs where access counters are enabled on this VA space
+    uvm_parent_processor_mask_t access_counters_enabled_processors;

    // Array with information regarding CPU/GPU NUMA affinity. There is one
    // entry per CPU NUMA node. Entries in the array are populated sequentially
@ -308,7 +312,8 @@ struct uvm_va_space_struct

        // Lists of counters listening for events on this VA space
        struct list_head counters[UVM_TOTAL_COUNTERS];
-        struct list_head queues[UvmEventNumTypesAll];
+        struct list_head queues_v1[UvmEventNumTypesAll];
+        struct list_head queues_v2[UvmEventNumTypesAll];

        // Node for this va_space in global subscribers list
        struct list_head node;
@ -399,7 +404,7 @@ static void uvm_va_space_processor_uuid(uvm_va_space_t *va_space, NvProcessorUui
    else {
        uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
        UVM_ASSERT(gpu);
-        memcpy(uuid, uvm_gpu_uuid(gpu), sizeof(*uuid));
+        memcpy(uuid, &gpu->uuid, sizeof(*uuid));
    }
 }

@ -472,9 +477,9 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space);
        uvm_mutex_unlock(&(__va_space)->serialize_writers_lock);        \
    } while (0)

-// Get a registered gpu by uuid. This restricts the search for GPUs, to those that
-// have been registered with a va_space. This returns NULL if the GPU is not present, or not
-// registered with the va_space.
+// Get a registered gpu by uuid. This restricts the search for GPUs, to those
+// that have been registered with a va_space. This returns NULL if the GPU is
+// not present, or not registered with the va_space.
 //
 // LOCKING: The VA space lock must be held.
 uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
@ -501,13 +506,19 @@ bool uvm_va_space_can_read_duplicate(uvm_va_space_t *va_space, uvm_gpu_t *changi
 // Register a gpu in the va space
 // Note that each gpu can be only registered once in a va space
 //
+// The input gpu_uuid is for the phyisical GPU. The user_rm_va_space argument
+// identifies the SMC partition if provided and SMC is enabled.
+//
 // This call returns whether the GPU memory is a NUMA node in the kernel and the
 // corresponding node id.
+// It also returns the GI UUID (if gpu_uuid is a SMC partition) or a copy of
+// gpu_uuid if the GPU is not SMC capable or SMC is not enabled.
 NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
                                    const NvProcessorUuid *gpu_uuid,
                                    const uvm_rm_user_object_t *user_rm_va_space,
                                    NvBool *numa_enabled,
-                                    NvS32 *numa_node_id);
+                                    NvS32 *numa_node_id,
+                                    NvProcessorUuid *uuid_out);

 // Unregister a gpu from the va space
 NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
        }
    }

-    if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
+    if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
+        uvm_va_space_pageable_mem_access_supported(va_space)) {
+
        #if UVM_CAN_USE_MMU_NOTIFIERS()
            // Initialize MMU interval notifiers for this process. This allows
            // mmu_interval_notifier_insert() to be called without holding the
--- a/kernel-open/nvidia/internal_crypt_lib.h
+++ b/kernel-open/nvidia/internal_crypt_lib.h
@ -1,5 +1,5 @@
 /*
-* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -53,6 +53,7 @@
    (defined(CONFIG_CRYPTO_HMAC) || defined(CONFIG_CRYPTO_HMAC_MODULE)) && \
    (defined(CONFIG_CRYPTO_ECDH) || defined(CONFIG_CRYPTO_ECDH_MODULE)) && \
    (defined(CONFIG_CRYPTO_ECDSA) || defined(CONFIG_CRYPTO_ECDSA_MODULE)) && \
+    (defined(CONFIG_CRYPTO_RSA) || defined(CONFIG_CRYPTO_RSA_MODULE)) && \
    (defined(CONFIG_X509_CERTIFICATE_PARSER) || defined(CONFIG_X509_CERTIFICATE_PARSER_MODULE))
 #define NV_CONFIG_CRYPTO_PRESENT 1
 #endif
@ -151,4 +152,17 @@ bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public,
 bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid,
                       const uint8_t *message_hash, size_t hash_size,
                       const uint8_t *signature, size_t sig_size);
+
+bool lkca_rsa_verify(void *rsa_context, size_t hash_nid,
+                     const uint8_t *message_hash, size_t hash_size,
+                     const uint8_t *signature, size_t sig_size);
+
+bool lkca_rsa_pkcs1_sign(void *rsa_context, size_t hash_nid,
+                         const uint8_t *message_hash, size_t hash_size,
+                         uint8_t *signature, size_t *sig_size);
+
+bool lkca_rsa_pss_sign(void *rsa_context, size_t hash_nid,
+                       const uint8_t *message_hash, size_t hash_size,
+                       uint8_t *signature, size_t *sig_size);
+
 #endif
--- a/kernel-open/nvidia/libspdm_rsa.c
+++ b/kernel-open/nvidia/libspdm_rsa.c
@ -0,0 +1,611 @@
+/*
+* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-License-Identifier: MIT
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*/
+
+#include "internal_crypt_lib.h"
+#include "library/cryptlib.h"
+
+#ifdef USE_LKCA
+#include <linux/module.h>
+#include <linux/mpi.h>
+#include <linux/random.h>
+
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
+
+/* ------------------------ Macros & Defines ------------------------------- */
+#define GET_MOST_SIGNIFICANT_BIT(keySize)      (keySize > 0 ? ((keySize - 1) & 7) : 0)
+#define GET_ENC_MESSAGE_SIZE_BYTE(keySize)     (keySize + 7) >> 3;
+#define PKCS1_MGF1_COUNTER_SIZE_BYTE           (4)
+#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE        (8)
+#define RSA_PSS_TRAILER_FIELD                  (0xbc)
+#define SHIFT_RIGHT_AND_GET_BYTE(val, x)       ((val >> x) & 0xFF)
+#define BITS_TO_BYTES(b)                       (b >> 3)
+
+static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
+
+struct rsa_ctx
+{
+    struct rsa_key key;
+    bool   pub_key_set;
+    bool   priv_key_set;
+    int    size;
+};
+#endif // #ifdef USE_LKCA
+
+/*!
+ * Creating and initializing a RSA context.
+ *
+ * @return  : A void pointer points to a RSA context
+ *
+*/
+void *libspdm_rsa_new
+(
+    void
+)
+{
+#ifndef USE_LKCA
+    return NULL;
+#else
+    struct rsa_ctx *ctx;
+
+    ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+
+    if (ctx == NULL)
+    {
+        return NULL;
+    }
+
+    memset(ctx, 0, sizeof(*ctx));
+
+    ctx->pub_key_set = false;
+    ctx->priv_key_set = false;
+
+    return ctx;
+#endif
+}
+
+/*!
+ * To free a RSA context.
+ *
+ * @param rsa_context   : A RSA context pointer
+ *
+*/
+void libspdm_rsa_free
+(
+    void *rsa_context
+)
+{
+#ifdef USE_LKCA
+    struct rsa_ctx *ctx = rsa_context;
+
+    if (ctx != NULL)
+    {
+        if (ctx->key.n) kfree(ctx->key.n);
+        if (ctx->key.e) kfree(ctx->key.e);
+        if (ctx->key.d) kfree(ctx->key.d);
+        if (ctx->key.q) kfree(ctx->key.q);
+        if (ctx->key.p) kfree(ctx->key.p);
+        if (ctx->key.dq) kfree(ctx->key.dq);
+        if (ctx->key.dp) kfree(ctx->key.dp);
+        if (ctx->key.qinv) kfree(ctx->key.qinv);
+        kfree(ctx);
+    }
+#endif
+}
+
+#define rsa_set_key_case(a, a_sz, A) \
+    case A: \
+    { \
+        if (ctx->key.a) { \
+            kfree(ctx->key.a); \
+        } \
+        ctx->key.a = shadow_num; \
+        ctx->key.a_sz = bn_size; \
+        break; \
+    }
+/*!
+ * To set key into RSA context.
+ *
+ * @param rsa_context   : A RSA context pointer
+ * @param key_tag       : Indicate key tag for RSA key
+ * @param big_number    : A big nuMber buffer to store rsa KEY
+ * @param bn_size       : The size of bug number
+ *
+ * @Return              : True if OK; otherwise return False
+*/
+bool libspdm_rsa_set_key
+(
+    void                         *rsa_context,
+    const libspdm_rsa_key_tag_t  key_tag,
+    const uint8_t               *big_number,
+    size_t                       bn_size
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    struct rsa_ctx *ctx = rsa_context;
+    uint8_t *shadow_num;
+
+    if (ctx == NULL)
+    {
+        return false;
+    }
+
+    // Quick sanity check if tag is valid
+    switch (key_tag)
+    {
+        case LIBSPDM_RSA_KEY_N:
+        case LIBSPDM_RSA_KEY_E:
+        case LIBSPDM_RSA_KEY_D:
+        case LIBSPDM_RSA_KEY_Q:
+        case LIBSPDM_RSA_KEY_P:
+        case LIBSPDM_RSA_KEY_DP:
+        case LIBSPDM_RSA_KEY_DQ:
+        case LIBSPDM_RSA_KEY_Q_INV:
+            break;
+        default:
+            return false;
+            break;
+    }
+
+    if (big_number != NULL)
+    {
+        shadow_num = kmalloc(bn_size, GFP_KERNEL);
+        if (shadow_num == NULL)
+        {
+            return false;
+        }
+        memcpy(shadow_num, big_number, bn_size);
+    }
+    else
+    {
+        shadow_num = NULL;
+        bn_size = 0;
+    }
+
+    switch (key_tag)
+    {
+        rsa_set_key_case(n, n_sz, LIBSPDM_RSA_KEY_N)
+        rsa_set_key_case(e, e_sz, LIBSPDM_RSA_KEY_E)
+        rsa_set_key_case(d, d_sz, LIBSPDM_RSA_KEY_D)
+        rsa_set_key_case(q, q_sz, LIBSPDM_RSA_KEY_Q)
+        rsa_set_key_case(p, p_sz, LIBSPDM_RSA_KEY_P)
+        rsa_set_key_case(dq, dq_sz, LIBSPDM_RSA_KEY_DQ)
+        rsa_set_key_case(dp, dp_sz, LIBSPDM_RSA_KEY_DP)
+        rsa_set_key_case(qinv, qinv_sz, LIBSPDM_RSA_KEY_Q_INV)
+        default:
+            // We can't get here ever
+            break;
+    }
+
+    return true;
+#endif
+}
+
+/*!
+ * Perform PKCS1 MGF1 operation.
+ *
+ * @param mask            : A mask pointer to store return data
+ * @param maskedDB_length : Indicate mask data block length
+ * @param seed            : A seed pointer to store random values
+ * @param seed_length     : The seed length
+ * @param hash_nid        : The hash NID
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+static bool NV_PKCS1_MGF1
+(
+    uint8_t       *mask,
+    size_t         maskedDB_length,
+    const uint8_t *seed,
+    size_t         seed_length,
+    size_t         hash_nid
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    size_t   mdLength;
+    size_t   counter;
+    size_t   outLength;
+    uint8_t  counterBuf[4];
+    void    *sha384_ctx = NULL;
+    uint8_t  hash_value[LIBSPDM_SHA384_DIGEST_SIZE];
+    bool     status = false;
+
+    if (mask == NULL || seed == NULL)
+    {
+        return false;
+    }
+
+    // Only support SHA384 for MGF1 now.
+    if (hash_nid == LIBSPDM_CRYPTO_NID_SHA384)
+    {
+        mdLength = LIBSPDM_SHA384_DIGEST_SIZE;
+    }
+    else
+    {
+        return false;
+    }
+
+    sha384_ctx = libspdm_sha384_new();
+
+    if (sha384_ctx == NULL)
+    {
+       pr_err("%s : libspdm_sha384_new() failed \n", __FUNCTION__);
+       return false;
+    }
+
+    for (counter = 0, outLength = 0; outLength < maskedDB_length; counter++)
+    {
+        counterBuf[0] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 24);
+        counterBuf[1] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 16);
+        counterBuf[2] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 8);
+        counterBuf[3] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 0);
+
+        status = libspdm_sha384_init(sha384_ctx);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_init() failed !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        status = libspdm_sha384_update(sha384_ctx, seed, seed_length);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_update() failed(seed) !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        status = libspdm_sha384_update(sha384_ctx, counterBuf, 4);
+
+        if (!status)
+        {
+            pr_err("%s: libspdm_sha384_update() failed(counterBuf) !! \n", __FUNCTION__);
+            goto _error_exit;
+        }
+
+        if (outLength + mdLength <= maskedDB_length)
+        {
+            status = libspdm_sha384_final(sha384_ctx, mask + outLength);
+
+            if (!status)
+            {
+                pr_err("%s: libspdm_sha384_final() failed (<= maskedDB_length) !! \n", __FUNCTION__);
+                goto _error_exit;
+            }
+            outLength += mdLength;
+        }
+        else
+        {
+            status = libspdm_sha384_final(sha384_ctx, hash_value);
+
+            if (!status)
+            {
+                pr_err("%s: libspdm_sha384_final() failed(> maskedDB_length) !! \n", __FUNCTION__);
+                goto _error_exit;
+            }
+
+            memcpy(mask + outLength, hash_value, maskedDB_length - outLength);
+            outLength = maskedDB_length;
+        }
+    }
+    status = true;
+
+_error_exit:
+    libspdm_sha384_free(sha384_ctx);
+    return status;
+#endif
+}
+
+/*
+    0xbc : Trailer Field
+                          +-----------+
+                          |     M     |
+                          +-----------+
+                                |
+                                V
+                              Hash
+                                |
+                                V
+                  +--------+----------+----------+
+             M' = |Padding1|  mHash   |   salt   |
+                  +--------+----------+----------+
+                  |--------------|---------------|
+                                 |
+       +--------+----------+     V
+ DB =  |Padding2|   salt   |   Hash
+       +--------+----------+     |
+                 |               |
+                 V               |
+                xor <--- MGF <---|
+                 |               |
+                 |               |
+                 V               V
+       +-------------------+----------+----+
+ EM =  |    maskedDB       |     H    |0xbc|
+       +-------------------+----------+----+
+
+salt : The random number, we hardcode its size as hash size here.
+M'   : The concatenation of padding1 + message hash  + salt
+MGF  : Mask generation function.
+       A mask generation function takes an octet string of variable length
+       and a desired output length as input, and outputs an octet string of
+       the desired length
+       MGF1 is a Mask Generation Function based on a hash function.
+
+Padding1 : 8 zeros
+Padding2 : 0x01
+
+The detail spec is at https://datatracker.ietf.org/doc/html/rfc2437
+*/
+
+/*!
+ * Set keys and call PKCS1_MGF1 to generate signature.
+ *
+ * @param rsa_context     : A RSA context pointer
+ * @param hash_nid        : The hash NID
+ * @param message_hash    : The pointer to message hash
+ * @param signature       : The pointer is used to store generated signature
+ * @param sig_size        : For input, a pointer store signature buffer size.
+ *                          For output, a pointer store generate signature size.
+ * @param salt_Length     : The salt length for RSA-PSS algorithm
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+static bool nvRsaPaddingAddPkcs1PssMgf1
+(
+    void          *rsa_context,
+    size_t         hash_nid,
+    const uint8_t *message_hash,
+    size_t         hash_size,
+    uint8_t       *signature,
+    size_t        *sig_size,
+    int            salt_length
+)
+{
+#ifndef USE_LKCA
+    return false;
+#else
+    bool     status = false;
+    struct   rsa_ctx *ctx = rsa_context;
+    void    *sha384_ctx = NULL;
+    uint32_t keySize;
+    uint32_t msBits;
+    size_t   emLength;
+    uint8_t  saltBuf[64];
+    size_t   maskedDB_length;
+    size_t   i;
+    uint8_t  *tmp_H;
+    uint8_t  *tmp_P;
+    int       rc;
+    unsigned int ret_data_size;
+    MPI  mpi_n = NULL;
+    MPI  mpi_d = NULL;
+    MPI  mpi_c = mpi_alloc(0);
+    MPI  mpi_p = mpi_alloc(0);
+
+    // read modulus to BN struct
+    mpi_n = mpi_read_raw_data(ctx->key.n, ctx->key.n_sz);
+    if (mpi_n == NULL)
+    {
+        pr_err("%s : mpi_n create failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // read private exponent to BN struct
+    mpi_d = mpi_read_raw_data(ctx->key.d, ctx->key.d_sz);
+    if (mpi_d == NULL)
+    {
+        pr_err("%s : mpi_d create failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    keySize  = mpi_n->nbits;
+    msBits   = GET_MOST_SIGNIFICANT_BIT(keySize);
+    emLength = BITS_TO_BYTES(keySize);
+
+    if (msBits == 0)
+    {
+        *signature++ = 0;
+        emLength--;
+    }
+
+    if (emLength < hash_size + 2)
+    {
+        pr_err("%s : emLength < hash_size + 2 !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // Now, we only support salt_length == LIBSPDM_SHA384_DIGEST_SIZE
+    if (salt_length != LIBSPDM_SHA384_DIGEST_SIZE ||
+        hash_nid    != LIBSPDM_CRYPTO_NID_SHA384)
+    {
+        pr_err("%s : Invalid salt_length (%x) \n", __FUNCTION__, salt_length);
+        goto _error_exit;
+    }
+
+    get_random_bytes(saltBuf, salt_length);
+
+    maskedDB_length = emLength - hash_size - 1;
+    tmp_H = signature + maskedDB_length;
+    sha384_ctx = libspdm_sha384_new();
+
+    if (sha384_ctx == NULL)
+    {
+        pr_err("%s : libspdm_sha384_new() failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_init(sha384_ctx);
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_init() failed !! \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_update(sha384_ctx, zeroes, sizeof(zeroes));
+
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_update() with zeros failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    status = libspdm_sha384_update(sha384_ctx, message_hash, hash_size);
+
+    if (!status)
+    {
+        pr_err("%s: libspdm_sha384_update() with message_hash failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    if (salt_length)
+    {
+        status = libspdm_sha384_update(sha384_ctx, saltBuf, salt_length);
+        if (!status)
+        {
+            pr_err("%s : libspdm_sha384_update() with saltBuf failed !!\n", __FUNCTION__);
+            goto _error_exit;
+        }
+    }
+
+    status = libspdm_sha384_final(sha384_ctx, tmp_H);
+    if (!status)
+    {
+        pr_err("%s : libspdm_sha384_final() with tmp_H failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    /* Generate dbMask in place then perform XOR on it */
+    status = NV_PKCS1_MGF1(signature, maskedDB_length, tmp_H, hash_size, hash_nid);
+
+    if (!status)
+    {
+        pr_err("%s : NV_PKCS1_MGF1() failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    tmp_P = signature;
+    tmp_P += emLength - salt_length - hash_size - 2;
+    *tmp_P++ ^= 0x1;
+
+    if (salt_length > 0)
+    {
+        for (i = 0; i < salt_length; i++)
+        {
+            *tmp_P++ ^= saltBuf[i];
+        }
+    }
+
+    if (msBits)
+    {
+        signature[0] &= 0xFF >> (8 - msBits);
+    }
+
+    /* H is already in place so just set final 0xbc */
+    signature[emLength - 1] = RSA_PSS_TRAILER_FIELD;
+
+    // read signature to BN struct
+    mpi_p = mpi_read_raw_data(signature, emLength);
+    if (mpi_p == NULL)
+    {
+        pr_err("%s : mpi_p() create failed !!\n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    // Staring RSA encryption with private key over signature.
+    rc = mpi_powm(mpi_c, mpi_p, mpi_d, mpi_n);
+    if (rc != 0)
+    {
+        pr_err("%s : mpi_powm()  failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    rc = mpi_read_buffer(mpi_c, signature, *sig_size, &ret_data_size, NULL);
+    if (rc != 0)
+    {
+        pr_err("%s : mpi_read_buffer()  failed \n", __FUNCTION__);
+        goto _error_exit;
+    }
+
+    if (ret_data_size > *sig_size)
+    {
+        goto _error_exit;
+    }
+
+    *sig_size = ret_data_size;
+    status = true;
+
+_error_exit:
+
+   mpi_free(mpi_n);
+   mpi_free(mpi_d);
+   mpi_free(mpi_c);
+   mpi_free(mpi_p);
+
+   libspdm_sha384_free(sha384_ctx);
+
+   return status;
+
+#endif
+}
+
+/*!
+ * Perform RSA-PSS sigaature sign process with LKCA library.
+ *
+ * @param rsa_context     : A RSA context pointer
+ * @param hash_nid        : The hash NID
+ * @param message_hash    : The pointer to  message hash
+ * @param signature       : The pointer is used to store generated signature
+ * @param sig_size        : For input, a pointer store signature buffer size.
+ *                          For output, a pointer store generate signature size.
+ *
+ * @Return                : True if OK; otherwise return False
+ */
+bool lkca_rsa_pss_sign
+(
+    void          *rsa_context,
+    size_t         hash_nid,
+    const uint8_t *message_hash,
+    size_t         hash_size,
+    uint8_t       *signature,
+    size_t        *sig_size
+)
+{
+#ifndef USE_LKCA
+    return true;
+#else
+    return  nvRsaPaddingAddPkcs1PssMgf1(rsa_context,
+                                        hash_nid,
+                                        message_hash,
+                                        hash_size,
+                                        signature,
+                                        sig_size,
+                                        LIBSPDM_SHA384_DIGEST_SIZE);
+#endif
+}
+
--- a/kernel-open/nvidia/libspdm_rsa_ext.c
+++ b/kernel-open/nvidia/libspdm_rsa_ext.c
@ -0,0 +1,85 @@
+/*
+* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-License-Identifier: MIT
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+* DEALINGS IN THE SOFTWARE.
+*
+* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
+* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
+*/
+
+/** @file
+ * RSA Asymmetric Cipher Wrapper Implementation.
+ *
+ * This file implements following APIs which provide more capabilities for RSA:
+ * 1) rsa_pss_sign
+ *
+ * RFC 8017 - PKCS #1: RSA Cryptography Specifications version 2.2
+ **/
+
+#include "internal_crypt_lib.h"
+#include "library/cryptlib.h"
+
+/**
+ * Carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme.
+ *
+ * This function carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme defined in
+ * RSA PKCS#1 v2.2.
+ *
+ * The salt length is same as digest length.
+ *
+ * If the signature buffer is too small to hold the contents of signature, false
+ * is returned and sig_size is set to the required buffer size to obtain the signature.
+ *
+ * If rsa_context is NULL, then return false.
+ * If message_hash is NULL, then return false.
+ * If hash_size need match the hash_nid. nid could be SHA256, SHA384, SHA512, SHA3_256, SHA3_384, SHA3_512.
+ * If sig_size is large enough but signature is NULL, then return false.
+ *
+ * @param[in]       rsa_context   Pointer to RSA context for signature generation.
+ * @param[in]       hash_nid      hash NID
+ * @param[in]       message_hash  Pointer to octet message hash to be signed.
+ * @param[in]       hash_size     size of the message hash in bytes.
+ * @param[out]      signature    Pointer to buffer to receive RSA-SSA PSS signature.
+ * @param[in, out]  sig_size      On input, the size of signature buffer in bytes.
+ *                              On output, the size of data returned in signature buffer in bytes.
+ *
+ * @retval  true   signature successfully generated in RSA-SSA PSS.
+ * @retval  false  signature generation failed.
+ * @retval  false  sig_size is too small.
+ *
+ **/
+bool libspdm_rsa_pss_sign(void *rsa_context, size_t hash_nid,
+                          const uint8_t *message_hash, size_t hash_size,
+                          uint8_t *signature, size_t *sig_size)
+{
+    return lkca_rsa_pss_sign(rsa_context, hash_nid, message_hash, hash_size,
+                             signature, sig_size);
+}
+//
+// In RM, we just need sign process; so we stub verification function.
+// Verification function is needed in GSP code only,
+//
+bool libspdm_rsa_pss_verify(void *rsa_context, size_t hash_nid,
+                            const uint8_t *message_hash, size_t hash_size,
+                            const uint8_t *signature, size_t sig_size)
+{
+    return false;
+}
+
--- a/kernel-open/nvidia/nv-caps-imex.c
+++ b/kernel-open/nvidia/nv-caps-imex.c
@ -0,0 +1,153 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nv-linux.h"
+
+extern int NVreg_ImexChannelCount;
+
+static int nv_caps_imex_open(struct inode *inode, struct file *file)
+{
+    return 0;
+}
+
+static int nv_caps_imex_release(struct inode *inode, struct file *file)
+{
+    return 0;
+}
+
+static struct file_operations g_nv_caps_imex_fops =
+{
+    .owner = THIS_MODULE,
+    .open    = nv_caps_imex_open,
+    .release = nv_caps_imex_release
+};
+
+struct
+{
+    NvBool initialized;
+    struct cdev cdev;
+    dev_t devno;
+} g_nv_caps_imex;
+
+int NV_API_CALL nv_caps_imex_channel_get(int fd)
+{
+#if NV_FILESYSTEM_ACCESS_AVAILABLE
+    struct file *file;
+    struct inode *inode;
+    int channel = -1;
+
+    file = fget(fd);
+    if (file == NULL)
+    {
+        return channel;
+    }
+
+    inode = NV_FILE_INODE(file);
+    if (inode == NULL)
+    {
+        goto out;
+    }
+
+    /* Make sure the fd belongs to the nv-caps-imex-drv */
+    if (file->f_op != &g_nv_caps_imex_fops)
+    {
+        goto out;
+    }
+
+    /* minor number is same as channel */
+    channel = MINOR(inode->i_rdev);
+
+out:
+    fput(file);
+
+    return channel;
+#else
+    return -1;
+#endif
+}
+
+int NV_API_CALL nv_caps_imex_channel_count(void)
+{
+    return NVreg_ImexChannelCount;
+}
+
+int NV_API_CALL nv_caps_imex_init(void)
+{
+    int rc;
+
+    if (g_nv_caps_imex.initialized)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex is already initialized.\n");
+        return -EBUSY;
+    }
+
+    if (NVreg_ImexChannelCount == 0)
+    {
+        nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
+        return 0;
+    }
+
+    rc = alloc_chrdev_region(&g_nv_caps_imex.devno, 0,
+                             NVreg_ImexChannelCount,
+                             "nvidia-caps-imex-channels");
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to create cdev.\n");
+        return rc;
+    }
+
+    cdev_init(&g_nv_caps_imex.cdev, &g_nv_caps_imex_fops);
+
+    g_nv_caps_imex.cdev.owner = THIS_MODULE;
+
+    rc = cdev_add(&g_nv_caps_imex.cdev, g_nv_caps_imex.devno,
+                  NVreg_ImexChannelCount);
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to add cdev.\n");
+        goto cdev_add_fail;
+    }
+
+    g_nv_caps_imex.initialized = NV_TRUE;
+
+    return 0;
+
+cdev_add_fail:
+    unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
+
+    return rc;
+}
+
+void NV_API_CALL nv_caps_imex_exit(void)
+{
+    if (!g_nv_caps_imex.initialized)
+    {
+        return;
+    }
+
+    cdev_del(&g_nv_caps_imex.cdev);
+
+    unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
+
+    g_nv_caps_imex.initialized = NV_FALSE;
+}
--- a/kernel-open/nvidia/nv-caps-imex.h
+++ b/kernel-open/nvidia/nv-caps-imex.h
@ -0,0 +1,34 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _NV_CAPS_IMEX_H_
+#define _NV_CAPS_IMEX_H_
+
+#include <nv-kernel-interface-api.h>
+
+int NV_API_CALL nv_caps_imex_init(void);
+void NV_API_CALL nv_caps_imex_exit(void);
+int NV_API_CALL nv_caps_imex_channel_get(int fd);
+int NV_API_CALL nv_caps_imex_channel_count(void);
+
+#endif  /* _NV_CAPS_IMEX_H_ */
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -577,12 +577,9 @@ int nvidia_mmap_helper(
            //
            // This path is similar to the sysmem mapping code.
            // TODO: Refactor is needed as part of bug#2001704.
-            // Use pfn_valid to determine whether the physical address has
-            // backing struct page. This is used to isolate P8 from P9.
            //
            if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
-                !IS_REG_OFFSET(nv, access_start, access_len) &&
-                (pfn_valid(PFN_DOWN(mmap_start))))
+                !IS_REG_OFFSET(nv, access_start, access_len))
            {
                ret = nvidia_mmap_numa(vma, mmap_context);
                if (ret)
--- a/kernel-open/nvidia/nv-reg.h
+++ b/kernel-open/nvidia/nv-reg.h
@ -839,6 +839,45 @@
 #define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
 #define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)

+/*
+ * Option: NVreg_ImexChannelCount
+ *
+ * Description:
+ *
+ * This option allows users to specify the number of IMEX (import/export)
+ * channels. Within an IMEX domain, the channels allow sharing memory
+ * securely in a multi-user environment using the CUDA driver's fabric handle
+ * based APIs.
+ *
+ * An IMEX domain is either an OS instance or a group of securely
+ * connected OS instances using the NVIDIA IMEX daemon. The option must
+ * be set to the same value on each OS instance within the IMEX domain.
+ *
+ * An IMEX channel is a logical entity that is represented by a /dev node.
+ * The IMEX channels are global resources within the IMEX domain. When
+ * exporter and importer CUDA processes have been granted access to the
+ * same IMEX channel, they can securely share memory.
+ *
+ * Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
+ * the related CUDA APIs will fail with an insufficient permission error until
+ * the /dev nodes are set up. The creation of these /dev nodes,
+ * /dev/nvidia-caps-imex-channels/channelN, must be handled by the
+ * administrator, where N is the minor number. The major number can be
+ * queried from /proc/devices.
+ *
+ * nvidia-modprobe CLI support is available to set up the /dev nodes.
+ * NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
+ * and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
+ *
+ * Possible values:
+ *  0 - Disable IMEX using CUDA driver's fabric handles.
+ *  N - N IMEX channels will be enabled in the driver to facilitate N
+ *      concurrent users. Default value is 2048 channels, and the current
+ *      maximum value is 20-bit, same as Linux dev_t's minor number limit.
+ */
+#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
+#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
+
 #if defined(NV_DEFINE_REGISTRY_KEY_TABLE)

 /*
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
 NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
 NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
 NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
+NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);

 /*
 *----------------registry database definition----------------------
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
+    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
    {NULL, NULL}
 };

--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -55,6 +55,7 @@
 #include "nv-kthread-q.h"
 #include "nv-pat.h"
 #include "nv-dmabuf.h"
+#include "nv-caps-imex.h"

 #if !defined(CONFIG_RETPOLINE)
 #include "nv-retpoline.h"
@ -825,11 +826,18 @@ static int __init nvidia_init_module(void)
        goto procfs_exit;
    }

+    rc = nv_caps_imex_init();
+    if (rc < 0)
+    {
+        nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
+        goto caps_root_exit;
+    }
+
    rc = nv_module_init(&sp);
    if (rc < 0)
    {
        nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
-        goto caps_root_exit;
+        goto caps_imex_exit;
    }

    count = nvos_count_devices();
@ -941,6 +949,9 @@ drivers_exit:
 module_exit:
    nv_module_exit(sp);

+caps_imex_exit:
+    nv_caps_imex_exit();
+
 caps_root_exit:
    nv_caps_root_exit();

@ -967,6 +978,8 @@ static void __exit nvidia_exit_module(void)

    nv_module_exit(sp);

+    nv_caps_imex_exit();
+
    nv_caps_root_exit();

    nv_procfs_exit();
@ -2040,7 +2053,7 @@ nvidia_close_callback(
 {
    nv_linux_state_t *nvl;
    nv_state_t *nv;
-    nvidia_stack_t *sp;
+    nvidia_stack_t *sp = nvlfp->sp;
    NvBool bRemove = NV_FALSE;

    nvl = nvlfp->nvptr;
@ -2052,12 +2065,11 @@ nvidia_close_callback(
         */

        nv_free_file_private(nvlfp);
-        nv_kmem_cache_free_stack(nvlfp->sp);
+        nv_kmem_cache_free_stack(sp);
        return;
    }

    nv = NV_STATE_PTR(nvl);
-    sp = nvlfp->sp;

    rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);

@ -6050,6 +6062,131 @@ failed:
    return NV_ERR_NOT_SUPPORTED;
 }

+void NV_API_CALL nv_get_screen_info(
+    nv_state_t  *nv,
+    NvU64       *pPhysicalAddress,
+    NvU32       *pFbWidth,
+    NvU32       *pFbHeight,
+    NvU32       *pFbDepth,
+    NvU32       *pFbPitch,
+    NvU64       *pFbSize
+)
+{
+    *pPhysicalAddress = 0;
+    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
+
+#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
+    if (num_registered_fb > 0)
+    {
+        int i;
+
+        for (i = 0; i < num_registered_fb; i++)
+        {
+            if (!registered_fb[i])
+                continue;
+
+            /* Make sure base address is mapped to GPU BAR */
+            if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
+            {
+                *pPhysicalAddress = registered_fb[i]->fix.smem_start;
+                *pFbWidth = registered_fb[i]->var.xres;
+                *pFbHeight = registered_fb[i]->var.yres;
+                *pFbDepth = registered_fb[i]->var.bits_per_pixel;
+                *pFbPitch = registered_fb[i]->fix.line_length;
+                *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
+                return;
+            }
+        }
+    }
+#endif
+
+    /*
+     * If the screen info is not found in the registered FBs then fallback
+     * to the screen_info structure.
+     *
+     * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
+     * generic framebuffers so the new generic system-framebuffer drivers can
+     * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
+     * device created by SYSFB_SIMPLEFB.
+     *
+     * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
+     * information required by nv_get_screen_info(), therefore you need to
+     * fall back onto the screen_info structure.
+     *
+     * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
+     * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
+     */
+
+#if NV_CHECK_EXPORT_SYMBOL(screen_info)
+    /*
+     * If there is not a framebuffer console, return 0 size.
+     *
+     * orig_video_isVGA is set to 1 during early Linux kernel
+     * initialization, and then will be set to a value, such as
+     * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
+     */
+    if (screen_info.orig_video_isVGA > 1)
+    {
+        NvU64 physAddr = screen_info.lfb_base;
+#if defined(VIDEO_CAPABILITY_64BIT_BASE)
+        physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
+#endif
+
+        /* Make sure base address is mapped to GPU BAR */
+        if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
+        {
+            *pPhysicalAddress = physAddr;
+            *pFbWidth = screen_info.lfb_width;
+            *pFbHeight = screen_info.lfb_height;
+            *pFbDepth = screen_info.lfb_depth;
+            *pFbPitch = screen_info.lfb_linelength;
+            *pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
+        }
+    }
+#else
+    {
+        nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
+        struct pci_dev *pci_dev = nvl->pci_dev;
+        int i;
+
+        if (pci_dev == NULL)
+            return;
+
+        BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
+        for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
+        {
+            int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
+            struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
+            struct resource *res = gpu_bar_res->child;
+
+            /*
+             * Console resource will become child resource of pci-dev resource.
+             * Check if child resource start address matches with expected
+             * console start address.
+             */
+            if ((res != NULL) &&
+                NV_IS_CONSOLE_MAPPED(nv, res->start))
+            {
+                NvU32 res_name_len = strlen(res->name);
+
+                /*
+                 * The resource name ends with 'fb' (efifb, vesafb, etc.).
+                 * For simple-framebuffer, the resource name is 'BOOTFB'.
+                 * Confirm if the resources name either ends with 'fb' or 'FB'.
+                 */
+                if ((res_name_len > 2) &&
+                    !strcasecmp((res->name + res_name_len - 2), "fb"))
+                {
+                    *pPhysicalAddress = res->start;
+                    *pFbSize = resource_size(res);
+                    return;
+                }
+            }
+        }
+    }
+#endif
+}
+

 module_init(nvidia_init_module);
 module_exit(nvidia_exit_module);
--- a/kernel-open/nvidia/nv_gpu_ops.h
+++ b/kernel-open/nvidia/nv_gpu_ops.h
@ -279,9 +279,11 @@ NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
                                          char *methodStream,
                                          NvU32 methodStreamSize);

-NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
+NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
+                                             NvBool bCopyAndFlush);

-NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo, NvBool bEnable);
+NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
+                                       NvBool bEnable);

 // Interface used for CCSL

--- a/kernel-open/nvidia/nv_uvm_interface.c
+++ b/kernel-open/nvidia/nv_uvm_interface.c
@ -985,24 +985,30 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
 }
 EXPORT_SYMBOL(nvUvmInterfaceGetNonReplayableFaults);

-NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device)
+NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
+                                                   NvBool bCopyAndFlush)
 {
    nvidia_stack_t *sp = nvUvmGetSafeStack();
    NV_STATUS status;

-    status = rm_gpu_ops_flush_replayable_fault_buffer(sp, (gpuDeviceHandle)device);
+    status = rm_gpu_ops_flush_replayable_fault_buffer(sp,
+                                                      pFaultInfo,
+                                                      bCopyAndFlush);

    nvUvmFreeSafeStack(sp);
    return status;
 }
 EXPORT_SYMBOL(nvUvmInterfaceFlushReplayableFaultBuffer);

-NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable)
+NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
+                                             NvBool bEnable)
 {
    nvidia_stack_t *sp = nvUvmGetSafeStack();
    NV_STATUS status;

-    status = rm_gpu_ops_toggle_prefetch_faults(sp, pFaultInfo, bEnable);
+    status = rm_gpu_ops_toggle_prefetch_faults(sp,
+                                               pFaultInfo,
+                                               bEnable);

    nvUvmFreeSafeStack(sp);
    return status;
--- a/kernel-open/nvidia/nvidia-sources.Kbuild
+++ b/kernel-open/nvidia/nvidia-sources.Kbuild
@ -30,18 +30,21 @@ NVIDIA_SOURCES += nvidia/nv-report-err.c
 NVIDIA_SOURCES += nvidia/nv-rsync.c
 NVIDIA_SOURCES += nvidia/nv-msi.c
 NVIDIA_SOURCES += nvidia/nv-caps.c
+NVIDIA_SOURCES += nvidia/nv-caps-imex.c
 NVIDIA_SOURCES += nvidia/nv_uvm_interface.c
 NVIDIA_SOURCES += nvidia/libspdm_aead.c
 NVIDIA_SOURCES += nvidia/libspdm_ecc.c
 NVIDIA_SOURCES += nvidia/libspdm_hkdf.c
 NVIDIA_SOURCES += nvidia/libspdm_rand.c
 NVIDIA_SOURCES += nvidia/libspdm_shash.c
+NVIDIA_SOURCES += nvidia/libspdm_rsa.c
 NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
 NVIDIA_SOURCES += nvidia/libspdm_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
 NVIDIA_SOURCES += nvidia/libspdm_ec.c
 NVIDIA_SOURCES += nvidia/libspdm_x509.c
+NVIDIA_SOURCES += nvidia/libspdm_rsa_ext.c
 NVIDIA_SOURCES += nvidia/nvlink_linux.c
 NVIDIA_SOURCES += nvidia/nvlink_caps.c
 NVIDIA_SOURCES += nvidia/linux_nvswitch.c
--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@ -25,6 +25,7 @@

 #include "os-interface.h"
 #include "nv-linux.h"
+#include "nv-caps-imex.h"

 #include "nv-time.h"

@ -59,6 +60,8 @@ NvBool os_dma_buf_enabled = NV_TRUE;
 NvBool os_dma_buf_enabled = NV_FALSE;
 #endif // CONFIG_DMA_SHARED_BUFFER

+NvBool os_imex_channel_is_supported = NV_TRUE;
+
 void NV_API_CALL os_disable_console_access(void)
 {
    console_lock();
@ -1231,90 +1234,6 @@ NvBool NV_API_CALL os_is_efi_enabled(void)
    return efi_enabled(EFI_BOOT);
 }

-void NV_API_CALL os_get_screen_info(
-    NvU64 *pPhysicalAddress,
-    NvU32 *pFbWidth,
-    NvU32 *pFbHeight,
-    NvU32 *pFbDepth,
-    NvU32 *pFbPitch,
-    NvU64 consoleBar1Address,
-    NvU64 consoleBar2Address
-)
-{
-    *pPhysicalAddress = 0;
-    *pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
-
-#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
-    if (num_registered_fb > 0)
-    {
-        int i;
-
-        for (i = 0; i < num_registered_fb; i++)
-        {
-            if (!registered_fb[i])
-                continue;
-
-            /* Make sure base address is mapped to GPU BAR */
-            if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
-                (registered_fb[i]->fix.smem_start == consoleBar2Address))
-            {
-                *pPhysicalAddress = registered_fb[i]->fix.smem_start;
-                *pFbWidth = registered_fb[i]->var.xres;
-                *pFbHeight = registered_fb[i]->var.yres;
-                *pFbDepth = registered_fb[i]->var.bits_per_pixel;
-                *pFbPitch = registered_fb[i]->fix.line_length;
-                return;
-            }
-        }
-    }
-#endif
-
-    /*
-     * If the screen info is not found in the registered FBs then fallback
-     * to the screen_info structure.
-     *
-     * The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
-     * generic framebuffers so the new generic system-framebuffer drivers can
-     * be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
-     * device created by SYSFB_SIMPLEFB.
-     *
-     * SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
-     * information required by os_get_screen_info(), therefore you need to
-     * fall back onto the screen_info structure.
-     *
-     * After commit b8466fe82b79 ("efi: move screen_info into efi init code")
-     * in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
-     */
-
-#if NV_CHECK_EXPORT_SYMBOL(screen_info)
-    /*
-     * If there is not a framebuffer console, return 0 size.
-     *
-     * orig_video_isVGA is set to 1 during early Linux kernel
-     * initialization, and then will be set to a value, such as
-     * VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
-     */
-    if (screen_info.orig_video_isVGA > 1)
-    {
-        NvU64 physAddr = screen_info.lfb_base;
-#if defined(VIDEO_CAPABILITY_64BIT_BASE)
-        physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
-#endif
-
-        /* Make sure base address is mapped to GPU BAR */
-        if ((physAddr == consoleBar1Address) ||
-            (physAddr == consoleBar2Address))
-        {
-            *pPhysicalAddress = physAddr;
-            *pFbWidth = screen_info.lfb_width;
-            *pFbHeight = screen_info.lfb_height;
-            *pFbDepth = screen_info.lfb_depth;
-            *pFbPitch = screen_info.lfb_linelength;
-        }
-    }
-#endif
-}
-
 void NV_API_CALL os_dump_stack(void)
 {
    dump_stack();
@ -2182,6 +2101,22 @@ void NV_API_CALL os_nv_cap_close_fd
    nv_cap_close_fd(fd);
 }

+NvS32 NV_API_CALL os_imex_channel_count
+(
+    void
+)
+{
+    return nv_caps_imex_channel_count();
+}
+
+NvS32 NV_API_CALL os_imex_channel_get
+(
+    NvU64 descriptor
+)
+{
+    return nv_caps_imex_channel_get((int)descriptor);
+}
+
 /*
 * Reads the total memory and free memory of a NUMA node from the kernel.
 */
--- a/kernel-open/nvidia/os-mlock.c
+++ b/kernel-open/nvidia/os-mlock.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -87,59 +87,10 @@ static NV_STATUS get_io_ptes(struct vm_area_struct *vma,
    return NV_OK;
 }

-/*!
- * @brief Pins user IO pages that have been mapped to the user processes virtual
- *        address space with remap_pfn_range.
- *
- * @param[in]     vma VMA that contains the virtual address range given by the
- *                    start and the page count.
- * @param[in]     start Beginning of the virtual address range of the IO pages.
- * @param[in]     page_count Number of pages to pin from start.
- * @param[in,out] page_array Storage array for pointers to the pinned pages.
- *                           Must be large enough to contain at least page_count
- *                           pointers.
- *
- * @return NV_OK if the pages were pinned successfully, error otherwise.
- */
-static NV_STATUS get_io_pages(struct vm_area_struct *vma,
-                              NvUPtr start,
-                              NvU64 page_count,
-                              struct page **page_array)
-{
-    NV_STATUS rmStatus = NV_OK;
-    NvU64 i, pinned = 0;
-    unsigned long pfn;
-
-    for (i = 0; i < page_count; i++)
-    {
-        if ((nv_follow_pfn(vma, (start + (i * PAGE_SIZE)), &pfn) < 0) ||
-            (!pfn_valid(pfn)))
-        {
-            rmStatus = NV_ERR_INVALID_ADDRESS;
-            break;
-        }
-
-        // Page-backed memory mapped to userspace with remap_pfn_range
-        page_array[i] = pfn_to_page(pfn);
-        get_page(page_array[i]);
-        pinned++;
-    }
-
-    if (pinned < page_count)
-    {
-        for (i = 0; i < pinned; i++)
-            put_page(page_array[i]);
-        rmStatus = NV_ERR_INVALID_ADDRESS;
-    }
-
-    return rmStatus;
-}
-
 NV_STATUS NV_API_CALL os_lookup_user_io_memory(
    void   *address,
    NvU64   page_count,
-    NvU64 **pte_array,
-    void  **page_array
+    NvU64 **pte_array
 )
 {
    NV_STATUS rmStatus;
@ -187,18 +138,9 @@ NV_STATUS NV_API_CALL os_lookup_user_io_memory(
        goto done;
    }

-    if (pfn_valid(pfn))
-    {
-        rmStatus = get_io_pages(vma, start, page_count, (struct page **)result_array);
-        if (rmStatus == NV_OK)
-            *page_array = (void *)result_array;
-    }
-    else
-    {
-        rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
-        if (rmStatus == NV_OK)
-            *pte_array = (NvU64 *)result_array;
-    }
+    rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
+    if (rmStatus == NV_OK)
+        *pte_array = (NvU64 *)result_array;

 done:
    nv_mmap_read_unlock(mm);
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r551_06
+    #define NV_BUILD_BRANCH             r551_40
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r551_06
+    #define NV_PUBLIC_BRANCH             r551_40
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r550/r551_06-132"
-#define NV_BUILD_CHANGELIST_NUM         (33773930)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r550/r551_40-170"
+#define NV_BUILD_CHANGELIST_NUM         (33933991)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r550/r551_06-132"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r550/r551_40-170"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33933991)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r551_06-14"
-#define NV_BUILD_CHANGELIST_NUM         (33773930)
-#define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "551.23"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
+#define NV_BUILD_BRANCH_VERSION         "r551_40-13"
+#define NV_BUILD_CHANGELIST_NUM         (33924744)
+#define NV_BUILD_TYPE                   "Nightly"
+#define NV_BUILD_NAME                   "r551_40-240221"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33921227)
 #define NV_BUILD_BRANCH_BASE_VERSION    R550
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvSemaphoreCommon.h
+++ b/src/common/inc/nvSemaphoreCommon.h
@ -94,8 +94,9 @@ static inline void NvTimeSemFermiSetMaxSubmitted(
    NvTimeSemFermiSetMaxSubmittedVal(&report->timer, value);
 }

-static inline NvU64 NvTimeSemFermiGetPayload(
-    NvReportSemaphore32 *report)
+static inline NvU64 NvTimeSemFermiGetPayloadVal(
+    volatile void *payloadPtr,
+    volatile void *maxSubmittedPtr)
 {
    // The ordering of the two operations below is critical.  Other threads
    // may be submitting GPU work that modifies the semaphore value, or
@ -129,11 +130,11 @@ static inline NvU64 NvTimeSemFermiGetPayload(
    // adjust the max submitted value back down if a wrap occurs between these
    // two operations, but has no way to bump the max submitted value up if a
    // wrap occurs with the opposite ordering.
-    NvU64 current = report->payload;
+    NvU64 current = *(volatile NvU32*)payloadPtr;
    // Use an atomic exchange to ensure the 64-bit read is atomic even on 32-bit
    // CPUs.
    NvU64 submitted = (NvU64)
-        __NVatomicCompareExchange64((volatile NvS64 *)&report->timer, 0ll, 0ll);
+        __NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr, 0ll, 0ll);

    nvAssert(!(current & 0xFFFFFFFF00000000ull));

@ -152,6 +153,12 @@ static inline NvU64 NvTimeSemFermiGetPayload(
    return current;
 }

+static inline NvU64 NvTimeSemFermiGetPayload(
+    NvReportSemaphore32 *report)
+{
+    return NvTimeSemFermiGetPayloadVal(&report->payload, &report->timer);
+}
+
 static inline void NvTimeSemFermiSetPayload(
    NvReportSemaphore32 *report,
    const NvU64 payload)
@ -167,12 +174,19 @@ static inline void NvTimeSemFermiSetPayload(
 * Volta and up.
 */

+static inline NvU64 NvTimeSemVoltaGetPayloadVal(
+    volatile void *payloadPtr)
+{
+    nvAssert(payloadPtr);
+    return (NvU64)
+        __NVatomicCompareExchange64((volatile NvS64 *)payloadPtr,
+                                    0, 0);
+}
+
 static inline NvU64 NvTimeSemVoltaGetPayload(
    NvReportSemaphore64 *report)
 {
-    return (NvU64)
-        __NVatomicCompareExchange64((volatile NvS64 *)&report->reportValue,
-                                    0, 0);
+    return NvTimeSemVoltaGetPayloadVal(&report->reportValue);
 }

 static inline void NvTimeSemVoltaSetPayload(
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "550.40.07"
+#define NV_VERSION_STRING               "550.54.14"

 #else

--- a/src/common/inc/swref/published/ampere/ga100/dev_ctxsw_prog.h
+++ b/src/common/inc/swref/published/ampere/ga100/dev_ctxsw_prog.h
@ -0,0 +1,32 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GA100_DEV_CTXSW_PROG_H
+#define GA100_DEV_CTXSW_PROG_H
+
+#define NV_CTXSW_TIMESTAMP_BUFFER_RD_WR_POINTER                            30:0  /*     */
+#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE                  31:31  /*     */
+#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_ENABLED            0x1  /*     */
+#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_DISABLED           0x0  /*     */
+
+#endif
--- a/src/common/inc/swref/published/ampere/ga100/dev_vm.h
+++ b/src/common/inc/swref/published/ampere/ga100/dev_vm.h
@ -123,9 +123,10 @@
 #define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_FALSE                     0x00000000 /* -WE-V */
 #define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_TRUE                      0x00000001 /* -W--V */
 #define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_MAX_CACHELINE_SIZE                0x00000010 /*       */
-#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL                                          0x2200 /* -W-4R */
-#define NV_VIRTUAL_FUNCTION_TIME_0                           0x30080 /* R--4R */
-#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC                         31:5 /* R-XUF */
-#define NV_VIRTUAL_FUNCTION_TIME_1                           0x30084 /* R--4R */
-#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC                         28:0 /* R-XUF */
+#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL                                         0x2200 /* -W-4R */
+#define NV_VIRTUAL_FUNCTION_TIME_0                                                0x30080 /* R--4R */
+#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC                                           31:5 /* R-XUF */
+#define NV_VIRTUAL_FUNCTION_TIME_1                                                0x30084 /* R--4R */
+#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC                                           28:0 /* R-XUF */
+#define NV_VIRTUAL_FUNCTION_PRIV_MAILBOX_SCRATCH(i)                               (0x2100+(i)*4) /* RW-4A */
 #endif // __ga100_dev_vm_h__
--- a/src/common/modeset/timing/nvt_edid.c
+++ b/src/common/modeset/timing/nvt_edid.c
@ -991,7 +991,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
    pInfo->cc_white_y |= (p->Chromaticity[1] & NVT_PVT_EDID_CC_WHITE_Y1_Y0_MASK) >> NVT_PVT_EDID_CC_WHITE_Y1_Y0_SHIFT;

    // copy established timings
-    pInfo->established_timings_1_2  = (NvU16)p->bEstablishedTimings1 << 8;    
+    pInfo->established_timings_1_2  = (NvU16)p->bEstablishedTimings1 << 8;
    pInfo->established_timings_1_2 |= (NvU16)p->bEstablishedTimings2;

    // copy manuf reserved timings
@ -1039,7 +1039,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
                p861Info = (k == 0) ? &pInfo->ext861 : &pInfo->ext861_2;

                get861ExtInfo(pExt, sizeof(EDIDV1STRUC), p861Info);
-                
+
                // HF EEODB is present in edid v1.3 and v1.4 does not need this.Also, it is always present in the 1st CTA extension block.
                if (j == 1 && pInfo->version == NVT_EDID_VER_1_3)
                {
@ -1106,11 +1106,6 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
                            pInfo->ext_displayid20.interface_features.yuv420_min_pclk = 0;
                        }

-                        if (pInfo->ext861.revision == 0 && pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
-                        {
-                            pInfo->ext861.revision = NVT_CEA861_REV_B;
-                        }
-
                        if (pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
                        {
                            pInfo->ext861.basic_caps |= pInfo->ext_displayid20.basic_caps;
@ -1157,7 +1152,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
        }
    }

-    // Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[] 
+    // Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[]
    for (i = 0; i < pInfo->ext_displayid20.total_timings; i++)
    {
        if (!assignNextAvailableTiming(pInfo, &(pInfo->ext_displayid20.timing[i])))
@ -1215,7 +1210,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID

 CODE_SEGMENT(PAGE_DD_CODE)
 void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
-{    
+{
    NvU32 i, j, data;

    for (i = 0; i < pInfo->total_timings; i++)
@ -1226,8 +1221,8 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
        case NVT_TYPE_HDMI_STEREO:
        case NVT_TYPE_HDMI_EXT:
            // VTB timing use the base EDID (block 0) to determine the color format support
-        case NVT_TYPE_EDID_VTB_EXT:     
-        case NVT_TYPE_EDID_VTB_EXT_STD: 
+        case NVT_TYPE_EDID_VTB_EXT:
+        case NVT_TYPE_EDID_VTB_EXT_STD:
        case NVT_TYPE_EDID_VTB_EXT_DTD:
        case NVT_TYPE_EDID_VTB_EXT_CVT:
            // pInfo->u.feature_ver_1_3.color_type provides mono, rgb, rgy, undefined
@ -1245,7 +1240,7 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
            }
            updateBpcForTiming(pInfo, i);
            break;
-        default: 
+        default:
            // * the displayID_v1.3/v2.0 EDID extension need to follow the EDID bpc definition.
            // * all other default to base edid
            updateBpcForTiming(pInfo, i);
@ -1319,7 +1314,7 @@ NvBool isMatchedStandardTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)

    for (j = 0; j < pInfo->total_timings; j++)
    {
-        if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD && 
+        if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD &&
            NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
        {
            return NV_TRUE;
@ -1335,7 +1330,7 @@ NvBool isMatchedEstablishedTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)

    for (j = 0; j < pInfo->total_timings; j++)
    {
-        if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST && 
+        if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST &&
            NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
        {
            return NV_TRUE;
@ -1405,7 +1400,7 @@ void updateBpcForTiming(NVT_EDID_INFO *pInfo, NvU32 index)
            }
        }
        else if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
-                  pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED || 
+                  pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
                  pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_UNDEFINED) &&
                 p861Info->revision >= NVT_CEA861_REV_A)
        {
@ -1462,7 +1457,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,

    // the timing mapping index :
    //
-    // native_cta              - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators 
+    // native_cta              - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators
    //                           if the Source supports NVRDB and the NVRDB was found in the E-EDID
    // preferred_cta           - the "prefer SVD" in CTA-861-F (i.e. A Sink that prefers a Video Format that is not listed as an SVD in Video Data Block, but instead listed in YCBCR 4:2:0 VDB)
    // preferred_displayid_dtd - the "prefer detailed timing of DispalyID" extension
@ -1546,7 +1541,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,
        if (native_cta == pEdidInfo->total_timings && NVT_NATIVE_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
        {
            native_cta = i;
-        } 
+        }

        if (preferred_cta == pEdidInfo->total_timings && NVT_PREFERRED_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
        {
@ -2063,10 +2058,10 @@ NVT_STATUS NvTiming_GetEDIDBasedASPRTiming( NvU16 width, NvU16 height, NvU16 rr,
 *
 * @brief check EDID raw data is valid or not, and it will return the err flags if it existed
 * @param pEdid  : this is a pointer to EDID data
- * @param length : read length of EDID 
+ * @param length : read length of EDID
 * @param bIsTrongValidation : true - added more check
 *                             false- only header and checksum and size check
- * 
+ *
 */
 CODE_SEGMENT(PAGE_DD_CODE)
 NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongValidation)
@ -2086,12 +2081,12 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal
        return ret;
    }

-    // check the EDID version and signature    
+    // check the EDID version and signature
    if (getEdidVersion(pEdid, &version) != NVT_STATUS_SUCCESS)
    {
        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
        return ret;
-    } 
+    }

    // check block 0 checksum value
    if (!isChecksumValid(pEdid))
@ -2239,11 +2234,11 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal

 /**
 *
- * @brief sanity check EDID binary frequently used data block is valid or not, 
+ * @brief sanity check EDID binary frequently used data block is valid or not,
 *        and it will return error checkpoint flag if it existed
 * @param pEdid  : this is a pointer to EDID raw data
 * @param length : read length of EDID
- * 
+ *
 */
 CODE_SEGMENT(PAGE_DD_CODE)
 NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
@ -2255,7 +2250,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
    DETAILEDTIMINGDESCRIPTOR            *pDTD;
    // For CTA861
    NvU8                                ctaDTD_Offset;
-    NvU8                                *pData_collection;    
+    NvU8                                *pData_collection;
    NvU32                               ctaBlockTag, ctaPayload, vic;
    // For DisplayID
    DIDEXTENSION                        *pDisplayid;
@ -2283,7 +2278,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
    {
        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
    }
- 
+
    // 18bytes in DTD or Display Descriptor check
    for (i = 0; i < NVT_EDID_MAX_LONG_DISPLAY_DESCRIPTOR; i++)
    {
@ -2313,7 +2308,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
            }
        }
        else
-        {        
+        {
            pLdd = (EDID_LONG_DISPLAY_DESCRIPTOR *)&p->DetailedTimingDesc[i];

            // This block is a display descriptor, validate
@ -2327,7 +2322,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                NvU8    max_v_rate_offset, min_v_rate_offset, max_h_rate_offset, min_h_rate_offset;

                // add 255Hz offsets as needed before doing the check, use descriptor->rsvd2
-                nvt_assert(!(pLdd->rsvd2 & 0xF0));                    
+                nvt_assert(!(pLdd->rsvd2 & 0xF0));

                max_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MAX ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
                min_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MIN ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
@ -2340,19 +2335,19 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                    pRangeLimit->maxHRate == 0)
                {
                    ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_RANGE_LIMIT);
-                }                    
+                }
            }
        }
    }

    // extension and size check
    if ((NvU32)(p->bExtensionFlag + 1) * sizeof(EDIDV1STRUC) > length)
-    {   
+    {
        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT);
    }

-    // we shall not trust any extension blocks with wrong input EDID size 
-    if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) || 
+    // we shall not trust any extension blocks with wrong input EDID size
+    if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) ||
        NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT))
        return ret;

@ -2384,7 +2379,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                // validate SVD block
                ctaBlockTag = NVT_CEA861_GET_SHORT_DESCRIPTOR_TAG(((EIA861EXTENSION *)pExt)->data[0]);
                pData_collection = ((EIA861EXTENSION *)pExt)->data;
-                
+
                while ((ctaDTD_Offset - 4) > 0 && pData_collection != &pExt[ctaDTD_Offset] &&
                        ctaBlockTag > NVT_CEA861_TAG_RSVD && ctaBlockTag <= NVT_CEA861_TAG_EXTENDED_FLAG)
                {
@ -2451,7 +2446,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DTD);
                    else
                    {
-                        // check the max image size and 
+                        // check the max image size and
                        if (p->bMaxHorizImageSize != 0 && p->bMaxVertImageSize != 0)
                        {
                            NvU16 hDTDImageSize =  (pDTD->bDTHorizVertImage & 0xF0) << 4 | pDTD->bDTHorizontalImage;
@ -2466,7 +2461,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)

                if(!isChecksumValid(pExt))
                    ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_CTA_CHECKSUM);
-            break;                
+            break;
            case NVT_EDID_EXTENSION_DISPLAYID:
                pDisplayid = ((DIDEXTENSION *)pExt);
                if (pDisplayid->ext_count != 0)
@ -2483,10 +2478,10 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                {
                    if ((pDisplayid->struct_version & 0xFF) == 0x21)
                        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID_VERSION);
-                    
+
                    did2ExtCount++;

-                    if (pDisplayid->use_case == 0 && did2ExtCount == 1) 
+                    if (pDisplayid->use_case == 0 && did2ExtCount == 1)
                        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_USE_CASE);

                    // check the DisplayId2 valid timing
@ -2506,7 +2501,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                                ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TYPE7);

                            if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_RANGE_LIMITS)
-                                ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);                                
+                                ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);

                            if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_ADAPTIVE_SYNC)
                                ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_ADAPTIVE_SYNC);
@ -2527,9 +2522,9 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                    }

                    // if the first tag failed, ignore all the tags afterward then
-                    if (!bAllZero && 
-                        (pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY || 
-                        (pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE   && 
+                    if (!bAllZero &&
+                        (pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY ||
+                        (pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE   &&
                        pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_VENDOR_SPEC                  &&
                        pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_CTA_DATA))       &&
                        (pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
@ -2537,7 +2532,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TAG);
                        continue;
                    }
-                } 
+                }
                else if ((pDisplayid->struct_version & 0xFF) == 0x12 || (pDisplayid->struct_version & 0xFF) == 0x13)
                {
                    if ((pDisplayid->struct_version & 0xFF) == 0x13)
@ -2559,7 +2554,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)

                            if (pHeader->type == NVT_DISPLAYID_BLOCK_TYPE_RANGE_LIMITS)
                                ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
-                            
+
                            // add more data blocks tag here to evaluate
                        }
                        pData_collection += block_length;
@ -2580,7 +2575,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
                    if (!bAllZero                                             &&
                        pHeader->type > NVT_DISPLAYID_BLOCK_TYPE_TILEDDISPLAY &&
                        pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_CTA_DATA    &&
-                        pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&                          
+                        pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&
                        (pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
                    {
                        ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID13_TAG);
@ -2939,7 +2934,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)

        // Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
        NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);
-        
+
        // Wipe out the checksums
        CommonEDIDBuffer[CommonEDIDBuffer[1]+5/*mandatory bytes*/-1] = 0;
        CommonEDIDBuffer[0xFF] = 0;
@ -2954,7 +2949,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)
        // displayId2 standalone uses 256 length sections
        commonEDIDBufferSize = 256;
    }
-    else 
+    else
    {
        // Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
        NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);
--- a/src/common/modeset/timing/nvt_edidext_displayid.c
+++ b/src/common/modeset/timing/nvt_edidext_displayid.c
@ -111,7 +111,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
    nvt_assert((timingIdx) <= COUNT(pInfo->timing));

    if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
-         pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED || 
+         pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
         pInfo->ext861.valid.H14B_VSDB || pInfo->ext861.valid.H20_HF_VSDB) && pInfo->ext861.revision >= NVT_CEA861_REV_A)
    {
         if (!pInfo->ext_displayid.supported_displayId2_0)
@ -153,7 +153,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
                                                        pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_10b,
                                                        pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_12b,
                                                        pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_14b,
-                                                        pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);         
+                                                        pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);
         }
    }

@ -174,7 +174,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
                                                    pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_14b,
                                                    pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_16b);
    }
-    else 
+    else
    {
        // yuv444
        UPDATE_BPC_FOR_COLORFORMAT(pT->etc.yuv444, 0, /* yuv444 does not support 6bpc */
@ -264,7 +264,7 @@ static NVT_STATUS parseDisplayIdSection(DISPLAYID_SECTION * section,
 * @brief Parses a displayID data block
 * @param block The DisplayID data block to parse
 * @param max_length The indicated total length of the each data block for checking
- * @param pLength return the indicated length of the each data block 
+ * @param pLength return the indicated length of the each data block
 * @param pEdidInfo EDID struct containing DisplayID information and
 *                  the timings or validation purpose if it is NULL
 */
@ -285,7 +285,7 @@ NVT_STATUS parseDisplayIdBlock(NvU8* pBlock,
        return NVT_STATUS_ERR;

    pInfo = pEdidInfo == NULL ? NULL : &pEdidInfo->ext_displayid;
-    
+
    *pLength = hdr->data_bytes + NVT_DISPLAYID_DATABLOCK_HEADER_LEN;

    switch (hdr->type)
@ -386,9 +386,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf

    for (i = 0; i < prim_num; i++)
    {
-        x_p = (blk->points)[i].color_x_bits_low + 
+        x_p = (blk->points)[i].color_x_bits_low +
            (DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
-        y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) + 
+        y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
            ((blk->points)[i].color_y_bits_high << 4);
        pInfo->primaries[i].x = x_p;
        pInfo->primaries[i].y = y_p;
@ -396,9 +396,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf

    for (j = 0; j < wp_num; j++)
    {
-        x_p = (blk->points)[i].color_x_bits_low + 
+        x_p = (blk->points)[i].color_x_bits_low +
            (DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
-        y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) + 
+        y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
            ((blk->points)[i].color_y_bits_high << 4);
        pInfo->white_points[pInfo->total_primaries + j].x = x_p;
        pInfo->white_points[pInfo->total_primaries + j].y = y_p;
@ -508,7 +508,6 @@ static NVT_STATUS parseDisplayIdTiming1(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
 CODE_SEGMENT(PAGE_DD_CODE)
 static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR * type1, NVT_TIMING *pT)
 {
-    NvU32 totalPixels_in_2_fields;
    if (type1 == NULL || pT == NULL)
        return NVT_STATUS_ERR;

@ -569,30 +568,17 @@ static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR
    }

    // the refresh rate
-    if (pT->interlaced)
-    {
-        // in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
-        totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
-        // calculate the field rate in interlaced mode
-        pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
-        pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
-    }
-    else
-    {
-        // calculate frame rate in progressive mode
-        // in progressive mode filed = frame
-        pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
-        pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
-    }
+    pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
+    pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
    pT->etc.name[39] = '\0';
    pT->etc.rep = 0x1; // bit mask for no pixel repetition
-    
+
    pT->etc.status = NVT_STATUS_DISPLAYID_1;
    // Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have  dependency on sequence
    // so we'll just update the preferred flag, not sequence them
    //pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
    pT->etc.flag |= type1->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
-    
+
    /* Fields currently not used. Uncomment them for future use
    type1->options.stereo_support;
    */
@ -651,7 +637,6 @@ static NVT_STATUS parseDisplayIdTiming2(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
 CODE_SEGMENT(PAGE_DD_CODE)
 static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR * type2, NVT_TIMING *pT)
 {
-    NvU32 totalPixels_in_2_fields;
    if (type2 == NULL || pT == NULL)
        return NVT_STATUS_ERR;

@ -679,32 +664,19 @@ static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR
    pT->interlaced = type2->options.interface_frame_scanning_type;

    // the refresh rate
-    if (pT->interlaced)
-    {
-        // in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
-        totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
-        // calculate the field rate in interlaced mode
-        pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
-        pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
-    }
-    else
-    {
-        // calculate frame rate in progressive mode
-        // in progressive mode filed = frame
-        pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
-        pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
-    }
+    pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
+    pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);

    pT->etc.aspect = 0;
    pT->etc.name[39] = '\0';
    pT->etc.rep = 0x1; // Bit mask for no pixel repetition
-    
+
    pT->etc.status = NVT_STATUS_DISPLAYID_2;
    // Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have dependency on sequence
    // so we'll just update the preferred flag, not sequence them
    //pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
    pT->etc.flag |= type2->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
-    
+
    /* Fields currently not used. Uncomment them for future use
    type1->options.stereo_support;
    */
@ -861,12 +833,12 @@ static NVT_STATUS parseDisplayIdTiming5Descriptor(DISPLAYID_TIMING_5_DESCRIPTOR
 {
    NvU32 width, height, rr;
    NvBool is1000div1001 = NV_FALSE;
-    
+
    // we don't handle stereo type nor custom reduced blanking yet
    //NvU8 stereoType, formula;
    //stereoType = (desc->optns & NVT_DISPLAYID_TIMING_5_STEREO_SUPPORT_MASK);
    //formula = desc->optns & NVT_DISPLAYID_TIMING_5_FORMULA_SUPPORT_MASK;
-    
+
    if (desc->optns & NVT_DISPLAYID_TIMING_5_FRACTIONAL_RR_SUPPORT_MASK)
    {
        is1000div1001 = NV_TRUE;
@ -892,7 +864,7 @@ static NVT_STATUS parseDisplayIdTiming5(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
    for (i = 0; i * sizeof(DISPLAYID_TIMING_5_DESCRIPTOR) < blk->header.data_bytes; i++)
    {
        NVMISC_MEMSET(&newTiming, 0, sizeof(newTiming));
-        
+
        if (parseDisplayIdTiming5Descriptor(blk->descriptors + i, &newTiming) == NVT_STATUS_SUCCESS)
        {
            if (pEdidInfo == NULL) continue;
@ -1030,7 +1002,7 @@ static NVT_STATUS parseDisplayIdRangeLimits(NvU8 * block, NVT_DISPLAYID_INFO *pI

    rl = pInfo->range_limits + pInfo->rl_num;
    (pInfo->rl_num)++;
-    
+
    rl->pclk_min = minPclk;
    rl->pclk_max = maxPclk;

@ -1105,7 +1077,7 @@ static NVT_STATUS parseDisplayIdDeviceData(NvU8 * block, NVT_DISPLAYID_INFO *pIn
    pInfo->device_op_mode = DRF_VAL(T_DISPLAYID, _DEVICE, _OPERATING_MODE, blk->operating_mode);
    pInfo->support_backlight = DRF_VAL(T_DISPLAYID, _DEVICE, _BACKLIGHT, blk->operating_mode);
    pInfo->support_intensity = DRF_VAL(T_DISPLAYID, _DEVICE, _INTENSITY, blk->operating_mode);
-    
+
    pInfo->horiz_pixel_count = blk->horizontal_pixel_count;
    pInfo->vert_pixel_count = blk->vertical_pixel_count;

@ -1278,7 +1250,7 @@ static NVT_STATUS parseDisplayIdStereo(NvU8 * block, NVT_DISPLAYID_INFO *pInfo)
            nvt_assert(0);
            return NVT_STATUS_ERR;
    }
-    
+
    return NVT_STATUS_SUCCESS;
 }

@ -1322,7 +1294,7 @@ static NVT_STATUS parseDisplayIdTiledDisplay(NvU8 * block, NVT_DISPLAYID_INFO *p
    pInfo->bezel_info.left                  = (blk->bezel_info.left * blk->bezel_info.pixel_density) / 10;

    pInfo->tile_topology_id.vendor_id       = (blk->topology_id.vendor_id[2] << 16) |
-                                              (blk->topology_id.vendor_id[1] << 8 ) | 
+                                              (blk->topology_id.vendor_id[1] << 8 ) |
                                              blk->topology_id.vendor_id[0];

    pInfo->tile_topology_id.product_id      = (blk->topology_id.product_id[1] << 8) | blk->topology_id.product_id[0];
@ -1350,7 +1322,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)
    if (pInfo == NULL) return NVT_STATUS_SUCCESS;

    p861info = &pInfo->ext861;
-    
+
    pInfo->ext_displayid.cea_data_block_present = 1;
    p861info->revision = blk->revision;

@ -1366,7 +1338,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)

    //parse HDR related information from the HDR static metadata data block
    parseCea861HdrStaticMetadataDataBlock(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
-    
+
    // base video
    parse861bShortTiming(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
    // yuv420-only video
@ -1422,7 +1394,7 @@ static NVT_STATUS parseDisplayIdDisplayInterfaceFeatures(NvU8 * block, NVT_DISPL

    // Minimum Pixel Rate at Which YCbCr 4:2:0 Encoding Is Supported
    pInfo->u4.display_interface_features.minimum_pixel_rate_ycbcr420 = blk->minimum_pixel_rate_ycbcr420;
-    
+
    // Audio capability
    pInfo->u4.display_interface_features.audio_capability.support_32khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_32KHZ, blk->supported_audio_capability);
    pInfo->u4.display_interface_features.audio_capability.support_44_1khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_44_1KHZ, blk->supported_audio_capability);
--- a/src/common/modeset/timing/nvt_util.c
+++ b/src/common/modeset/timing/nvt_util.c
@ -235,7 +235,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)

        if (totalPixelsIn2Fields != 0)
        {
-            rr = (NvU16)axb_div_c(pclk * 2, 10000, totalPixelsIn2Fields);
+            rr = (NvU16)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000, (NvU64)totalPixelsIn2Fields);
        }
    }
    else
@ -244,7 +244,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)

        if (totalPixels != 0)
        {
-            rr = (NvU16)axb_div_c(pclk, 10000, totalPixels);
+            rr = (NvU16)axb_div_c_64((NvU64)pclk, (NvU64)10000, (NvU64)totalPixels);
        }
    }
    return rr;
@ -261,7 +261,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota

        if (totalPixelsIn2Fields != 0)
        {
-            rrx1k = (NvU32)axb_div_c(pclk * 2, 10000000, totalPixelsIn2Fields);
+            rrx1k = (NvU32)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000000, (NvU64)totalPixelsIn2Fields);
        }
    }
    else
@ -270,7 +270,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota

        if (totalPixels != 0)
        {
-            rrx1k = (NvU32)axb_div_c(pclk, 10000000, totalPixels);
+            rrx1k = (NvU32)axb_div_c_64((NvU64)pclk, (NvU64)10000000, (NvU64)totalPixels);
        }
    }
 
--- a/src/common/nvlink/interface/nvlink.h
+++ b/src/common/nvlink/interface/nvlink.h
@ -70,8 +70,8 @@ extern "C" {

 // Link Transition Timeouts in miliseconds
 #define NVLINK_TRANSITION_OFF_TIMEOUT        1
-#define NVLINK_TRANSITION_SAFE_TIMEOUT       300
-#define NVLINK_TRANSITION_HS_TIMEOUT         8000
+#define NVLINK_TRANSITION_SAFE_TIMEOUT       70
+#define NVLINK_TRANSITION_HS_TIMEOUT         7000
 #define NVLINK_TRANSITION_ACTIVE_PENDING     2000
 #define NVLINK_TRANSITION_POST_HS_TIMEOUT    70

--- a/src/common/nvswitch/kernel/cci/cables/cci_common.c
+++ b/src/common/nvswitch/kernel/cci/cables/cci_common.c
@ -222,8 +222,7 @@ _cci_module_cable_detect
            }
            default:
            {
-                NVSWITCH_ASSERT(0);
-                break;
+                return -NVL_ERR_NOT_SUPPORTED;
            }
        }
       
@ -348,8 +347,9 @@ _cci_module_identify
        // Mark as faulty
        device->pCci->isFaulty[moduleId] = NV_TRUE;

-        NVSWITCH_PRINT(device, ERROR,
-            "%s: Module HW check failed. Module %d\n", __FUNCTION__, moduleId);
+        NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
+                           "Module %d faulty\n", moduleId);
+
        return -NVL_ERR_GENERIC;
    } 

@ -612,6 +612,9 @@ _cci_module_identify_async
    NvlStatus retval;
    PCCI pCci = device->pCci;
    CCI_MODULE_ONBOARD_STATE nextState;
+    CCI_MODULE_STATE *pOnboardState;
+
+    pOnboardState = &device->pCci->moduleState[moduleId];

    nvswitch_os_memset(&nextState, 0, sizeof(CCI_MODULE_ONBOARD_STATE));

@ -637,8 +640,9 @@ _cci_module_identify_async
            }
            default:
            {
-                // Not expected
-                NVSWITCH_ASSERT(0);
+                // Invalid cable type
+                pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
+                pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
                nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
                break;
            }
@ -646,6 +650,8 @@ _cci_module_identify_async
    }
    else
    {
+        pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
+        pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
        nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
    }

--- a/src/common/nvswitch/kernel/lr10/lr10.c
+++ b/src/common/nvswitch/kernel/lr10/lr10.c
@ -7727,11 +7727,11 @@ nvswitch_ctrl_get_err_info_lr10
        }

        // TODO NVidia TL not supported
-        NVSWITCH_PRINT(device, WARN,
+        NVSWITCH_PRINT(device, NOISY,
            "%s WARNING: Nvidia %s register %s does not exist!\n",
            __FUNCTION__, "NVLTL", "NV_NVLTL_TL_ERRLOG_REG");

-        NVSWITCH_PRINT(device, WARN,
+        NVSWITCH_PRINT(device, NOISY,
            "%s WARNING: Nvidia %s register %s does not exist!\n",
            __FUNCTION__, "NVLTL", "NV_NVLTL_TL_INTEN_REG");

--- a/src/common/nvswitch/kernel/ls10/cci_ls10.c
+++ b/src/common/nvswitch/kernel/ls10/cci_ls10.c
@ -1638,6 +1638,9 @@ nvswitch_cci_module_access_cmd_ls10
            // Mark as faulty
            device->pCci->isFaulty[osfp] = NV_TRUE;

+            NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
+                           "Module %d access error\n", osfp);
+
            return -NVL_IO_ERROR;
        }

--- a/src/common/nvswitch/kernel/ls10/intr_ls10.c
+++ b/src/common/nvswitch/kernel/ls10/intr_ls10.c
@ -5549,6 +5549,29 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
    }
 }

+static void
+_nvswitch_dump_minion_ali_debug_registers_ls10
+(
+    nvswitch_device *device,
+    NvU32 link
+)
+{
+    NVSWITCH_MINION_ALI_DEBUG_REGISTERS params;
+    nvlink_link *nvlink = nvswitch_get_link(device, link);
+
+    if ((nvlink != NULL) &&
+        (nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, &params) == NVL_SUCCESS))
+    {
+        NVSWITCH_PRINT(device, ERROR,
+            "%s: Minion error on link #%d!:\n"
+                "Minion DLSTAT MN00 = 0x%x\n"
+                "Minion DLSTAT UC01 = 0x%x\n"
+                "Minion DLSTAT UC01 = 0x%x\n",
+            __FUNCTION__, link,
+            params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr);
+    }
+}
+
 static void
 _nvswitch_emit_link_errors_minion_fatal_ls10
 (
@ -5611,6 +5634,8 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
    enabledLinks &= ~bit;
    regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks);
    NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData);
+
+    _nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
 }

 static void
@ -5647,8 +5672,8 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
    switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
    {
        case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
-                NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
-                break;
+            NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
+            break;
        case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
            NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt");
            break;
@ -5660,6 +5685,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
            break;
    }

+    _nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
 }

 static void
--- a/src/common/nvswitch/kernel/ls10/link_ls10.c
+++ b/src/common/nvswitch/kernel/ls10/link_ls10.c
@ -42,6 +42,11 @@
 #include "nvswitch/ls10/dev_minion_ip_addendum.h" 
 #include "ls10/minion_nvlink_defines_public_ls10.h"

+#define NV_NVLINK_TLREQ_TIMEOUT_ACTIVE     10000
+#define NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN   10
+#define NV_NVLINK_TLREQ_TIMEOUT_RESET      4
+#define NV_NVLINK_TLREQ_TIMEOUT_L2         5
+
 static void
 _nvswitch_configure_reserved_throughput_counters
 (
@ -143,9 +148,9 @@ nvswitch_init_lpwr_regs_ls10
    if (status != NVL_SUCCESS)
    {
        NVSWITCH_PRINT(device, ERROR, "%s: Failed to set L1 Threshold\n",
-                       __FUNCTION__);
+                        __FUNCTION__);
    }
-}
+        }

 void
 nvswitch_corelib_training_complete_ls10
@ -1433,7 +1438,7 @@ nvswitch_load_link_disable_settings_ls10
    nvswitch_device *device,
    nvlink_link *link
 )
-{   
+{
    NvU32 regVal;

    // Read state from NVLIPT HW
@ -1443,7 +1448,7 @@ nvswitch_load_link_disable_settings_ls10
    if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
    {
        NVSWITCH_ASSERT(!cciIsLinkManaged(device, link->linkNumber));
-        
+
        // Set link to invalid and unregister from corelib
        device->link[link->linkNumber].valid = NV_FALSE;
        nvlink_lib_unregister_link(link);
@ -1589,7 +1594,7 @@ nvswitch_reset_and_train_link_ls10
                link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);

                if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
-                (link_intr_subcode == MINION_ALARM_BUSY))
+                    (link_intr_subcode == MINION_ALARM_BUSY))
                {

                    status = nvswitch_request_tl_link_state_ls10(link,
@ -1683,6 +1688,39 @@ nvswitch_are_link_clocks_on_ls10
    return NV_TRUE;
 }

+static
+NvlStatus
+_nvswitch_tl_request_get_timeout_value_ls10
+(
+    nvswitch_device *device,
+    NvU32  tlLinkState,
+    NvU32  *timeoutVal
+)
+{
+    switch (tlLinkState)
+    {
+        case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_ACTIVE:
+            *timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_ACTIVE;
+            break;
+        case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET:
+            *timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_RESET;
+            break;
+        case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN:
+            *timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN;
+            break;
+        case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_L2:
+            *timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_L2;
+            break;
+        default:
+            NVSWITCH_PRINT(device, ERROR,
+                "%s: Invalid tlLinkState %d provided!\n",
+                        __FUNCTION__, tlLinkState);
+            return NVL_BAD_ARGS;
+    }
+
+    return NVL_SUCCESS;
+}
+
 NvlStatus
 nvswitch_request_tl_link_state_ls10
 (
@ -1696,6 +1734,9 @@ nvswitch_request_tl_link_state_ls10
    NvU32 linkStatus;
    NvU32 lnkErrStatus;
    NvU32 bit;
+    NvU32            timeoutVal;
+    NVSWITCH_TIMEOUT timeout;
+    NvBool           keepPolling;

    if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
    {
@ -1729,17 +1770,43 @@ nvswitch_request_tl_link_state_ls10

    if (bSync)
    {
-        // Wait for the TL link state register to complete
-        status = nvswitch_wait_for_tl_request_ready_lr10(link);
+
+        // setup timeouts for the TL request
+        status = _nvswitch_tl_request_get_timeout_value_ls10(device, tlLinkState, &timeoutVal);
        if (status != NVL_SUCCESS)
        {
-            return status;
+            return NVL_ERR_INVALID_STATE;
        }

+        nvswitch_timeout_create(NVSWITCH_INTERVAL_1MSEC_IN_NS * timeoutVal, &timeout);
+        status = NVL_MORE_PROCESSING_REQUIRED;
+
+        do
+        {
+            keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
+
        // Check for state requested
        linkStatus  = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
                NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);

+            if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) ==
+                        tlLinkState)
+            {
+                status = NVL_SUCCESS;
+                break;
+            }
+
+            nvswitch_os_sleep(1);
+        }
+        while(keepPolling);
+
+        // Do one final check if the polling loop didn't see the target linkState
+        if (status == NVL_MORE_PROCESSING_REQUIRED)
+        {
+            // Check for state requested
+            linkStatus  = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
+                    NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
+
        if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
                    tlLinkState)
        {
@ -1750,6 +1817,8 @@ nvswitch_request_tl_link_state_ls10
        }
    }

+    }
+
    return status;
 }

--- a/src/common/sdk/nvidia/inc/class/cl00de.h
+++ b/src/common/sdk/nvidia/inc/class/cl00de.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -36,7 +36,36 @@ extern "C" {
 #define RUSD_TIMESTAMP_WRITE_IN_PROGRESS (NV_U64_MAX)
 #define RUSD_TIMESTAMP_INVALID           0

-#define RUSD_SEQ_DATA_VALID(x) ((((NvU32)(x)) & 0x1U) == 0)
+// seq = c_0 * b_0 + c_1 * (b_0 - 1)  where c_0 == open_count and c_1 == close_count
+// When they are equal, data is valid, otherwise data is being written.
+// b_0 == 1 mod (b_0 - 1) and b_0 - 1 == (-1) mod b_0
+// So, c_0 == seq mod (b_0 - 1) and c_1 == (-1 * seq) mod b_0
+// c_1 cannot be calculated quite so naively because negative modulos aren't fun, so we
+// instead do c_1 == (b_0 - (seq mod b_0)) mod b_0
+//
+#define RUSD_SEQ_BASE_SHIFT 20llu
+#define RUSD_SEQ_BASE0 (1llu << RUSD_SEQ_BASE_SHIFT)
+#define RUSD_SEQ_BASE1 (RUSD_SEQ_BASE0 - 1llu)
+#define RUSD_SEQ_COEFF1(x) ((RUSD_SEQ_BASE0 - ((x) % RUSD_SEQ_BASE0)) % RUSD_SEQ_BASE0)
+#define RUSD_SEQ_COEFF0(x) ((x) % RUSD_SEQ_BASE1)
+#define RUSD_SEQ_WRAP_SHIFT 18llu
+#define RUSD_SEQ_WRAP_VAL (1llu << RUSD_SEQ_WRAP_SHIFT)
+#define RUSD_SEQ_DATA_VALID(x) (RUSD_SEQ_COEFF0(x) == RUSD_SEQ_COEFF1(x))
+
+//
+// Helper macros to check seq before reading RUSD.
+// No dowhile wrap as it is using continue/break
+//
+#define RUSD_SEQ_CHECK1(SHARED_DATA)    \
+    NvU64 seq = (SHARED_DATA)->seq;     \
+    portAtomicMemoryFenceLoad();        \
+    if (!RUSD_SEQ_DATA_VALID(seq))      \
+         continue;
+
+#define RUSD_SEQ_CHECK2(SHARED_DATA)    \
+    portAtomicMemoryFenceLoad();        \
+    if (seq == (SHARED_DATA)->seq)      \
+         break;

 enum {
    RUSD_CLK_PUBLIC_DOMAIN_GRAPHICS = 0,
@ -166,10 +195,12 @@ typedef struct RUSD_INST_POWER_USAGE {
 } RUSD_INST_POWER_USAGE;

 typedef struct NV00DE_SHARED_DATA {
-    volatile NvU32 seq;
+    volatile NvU64 seq;

    NvU32 bar1Size;
    NvU32 bar1AvailSize;
+    NvU64 totalPmaMemory;
+    NvU64 freePmaMemory;

    // GSP polling data section
    NV_DECLARE_ALIGNED(RUSD_CLK_PUBLIC_DOMAIN_INFOS clkPublicDomainInfos, 8);
--- a/src/common/sdk/nvidia/inc/ctrl/ctrla081.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrla081.h
@ -853,7 +853,8 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
 } NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS;

 /* vGPU capabilities */
-#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU 0
+#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU         0
+#define NVA081_CTRL_VGPU_CAPABILITY_COMPUTE_MEDIA_ENGINE_GPU 1

 /*
 * NVA081_CTRL_CMD_VGPU_SET_CAPABILITY
@ -872,7 +873,7 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
 *   NV_ERR_OBJECT_NOT_FOUND
 *   NV_ERR_INVALID_ARGUMENT
 */
-#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY          (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */
+#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY                  (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */

 #define NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID (0x1eU)

@ -881,4 +882,30 @@ typedef struct NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS {
    NvBool state;
 } NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS;

+/*
+ * NVA081_CTRL_CMD_VGPU_GET_CAPABILITY
+ *
+ * This command is to get state of vGPU capability for the physical GPU.
+ *
+ * capability [IN]
+ *  This param specifies the requested capabiity of the device that is to be set
+ *  One of NVA081_CTRL_VGPU_CAPABILITY* values
+ *
+ * state [OUT]
+ *  This param specifies the state of the capability
+ *
+ * Possible status values returned are:
+ *   NV_OK
+ *   NV_ERR_OBJECT_NOT_FOUND
+ *   NV_ERR_INVALID_ARGUMENT
+ */
+#define NVA081_CTRL_CMD_VGPU_GET_CAPABILITY (0xa081011f) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID" */
+
+#define NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID (0x1fU)
+
+typedef struct NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS {
+    NvU32  capability;
+    NvBool state;
+} NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS;
+
 /* _ctrlA081vgpuconfig_h_ */
--- a/src/common/shared/inc/g_vgpu_chip_flags.h
+++ b/src/common/shared/inc/g_vgpu_chip_flags.h
@ -44,151 +44,6 @@ ENTRY(0x13BD, 0x11D6, 0x10de, "GRID M10-8A"),
 ENTRY(0x13BD, 0x1286, 0x10de, "GRID M10-2B"),
 ENTRY(0x13BD, 0x12EE, 0x10de, "GRID M10-2B4"),
 ENTRY(0x13BD, 0x1339, 0x10de, "GRID M10-1B4"),
-ENTRY(0x13F2, 0x114C, 0x10de, "GRID M60-0Q"),
-ENTRY(0x13F2, 0x114D, 0x10de, "GRID M60-1Q"),
-ENTRY(0x13F2, 0x114E, 0x10de, "GRID M60-2Q"),
-ENTRY(0x13F2, 0x114F, 0x10de, "GRID M60-4Q"),
-ENTRY(0x13F2, 0x1150, 0x10de, "GRID M60-8Q"),
-ENTRY(0x13F2, 0x1176, 0x10de, "GRID M60-0B"),
-ENTRY(0x13F2, 0x1177, 0x10de, "GRID M60-1B"),
-ENTRY(0x13F2, 0x117D, 0x10de, "GRID M60-2B"),
-ENTRY(0x13F2, 0x11AE, 0x10de, "GRID M60-1A"),
-ENTRY(0x13F2, 0x11AF, 0x10de, "GRID M60-2A"),
-ENTRY(0x13F2, 0x11B0, 0x10de, "GRID M60-4A"),
-ENTRY(0x13F2, 0x11B1, 0x10de, "GRID M60-8A"),
-ENTRY(0x13F2, 0x12EC, 0x10de, "GRID M60-2B4"),
-ENTRY(0x13F2, 0x1337, 0x10de, "GRID M60-1B4"),
-ENTRY(0x13F3, 0x117C, 0x10de, "GRID M6-2B"),
-ENTRY(0x13F3, 0x117E, 0x10de, "GRID M6-0B"),
-ENTRY(0x13F3, 0x117F, 0x10de, "GRID M6-1B"),
-ENTRY(0x13F3, 0x1180, 0x10de, "GRID M6-0Q"),
-ENTRY(0x13F3, 0x1181, 0x10de, "GRID M6-1Q"),
-ENTRY(0x13F3, 0x1182, 0x10de, "GRID M6-2Q"),
-ENTRY(0x13F3, 0x1183, 0x10de, "GRID M6-4Q"),
-ENTRY(0x13F3, 0x1184, 0x10de, "GRID M6-8Q"),
-ENTRY(0x13F3, 0x11AA, 0x10de, "GRID M6-1A"),
-ENTRY(0x13F3, 0x11AB, 0x10de, "GRID M6-2A"),
-ENTRY(0x13F3, 0x11AC, 0x10de, "GRID M6-4A"),
-ENTRY(0x13F3, 0x11AD, 0x10de, "GRID M6-8A"),
-ENTRY(0x13F3, 0x12ED, 0x10de, "GRID M6-2B4"),
-ENTRY(0x13F3, 0x1338, 0x10de, "GRID M6-1B4"),
-ENTRY(0x15F7, 0x1265, 0x10de, "GRID P100C-1B"),
-ENTRY(0x15F7, 0x1266, 0x10de, "GRID P100C-1Q"),
-ENTRY(0x15F7, 0x1267, 0x10de, "GRID P100C-2Q"),
-ENTRY(0x15F7, 0x1268, 0x10de, "GRID P100C-4Q"),
-ENTRY(0x15F7, 0x1269, 0x10de, "GRID P100C-6Q"),
-ENTRY(0x15F7, 0x126A, 0x10de, "GRID P100C-12Q"),
-ENTRY(0x15F7, 0x126B, 0x10de, "GRID P100C-1A"),
-ENTRY(0x15F7, 0x126C, 0x10de, "GRID P100C-2A"),
-ENTRY(0x15F7, 0x126D, 0x10de, "GRID P100C-4A"),
-ENTRY(0x15F7, 0x126E, 0x10de, "GRID P100C-6A"),
-ENTRY(0x15F7, 0x126F, 0x10de, "GRID P100C-12A"),
-ENTRY(0x15F7, 0x128D, 0x10de, "GRID P100C-2B"),
-ENTRY(0x15F7, 0x12F4, 0x10de, "GRID P100C-2B4"),
-ENTRY(0x15F7, 0x133F, 0x10de, "GRID P100C-1B4"),
-ENTRY(0x15F7, 0x137D, 0x10de, "GRID P100C-12C"),
-ENTRY(0x15F7, 0x138C, 0x10de, "GRID P100C-4C"),
-ENTRY(0x15F7, 0x138D, 0x10de, "GRID P100C-6C"),
-ENTRY(0x15F8, 0x1221, 0x10de, "GRID P100-1B"),
-ENTRY(0x15F8, 0x1222, 0x10de, "GRID P100-1Q"),
-ENTRY(0x15F8, 0x1223, 0x10de, "GRID P100-2Q"),
-ENTRY(0x15F8, 0x1224, 0x10de, "GRID P100-4Q"),
-ENTRY(0x15F8, 0x1225, 0x10de, "GRID P100-8Q"),
-ENTRY(0x15F8, 0x1226, 0x10de, "GRID P100-16Q"),
-ENTRY(0x15F8, 0x1227, 0x10de, "GRID P100-1A"),
-ENTRY(0x15F8, 0x1228, 0x10de, "GRID P100-2A"),
-ENTRY(0x15F8, 0x1229, 0x10de, "GRID P100-4A"),
-ENTRY(0x15F8, 0x122A, 0x10de, "GRID P100-8A"),
-ENTRY(0x15F8, 0x122B, 0x10de, "GRID P100-16A"),
-ENTRY(0x15F8, 0x128C, 0x10de, "GRID P100-2B"),
-ENTRY(0x15F8, 0x12F2, 0x10de, "GRID P100-2B4"),
-ENTRY(0x15F8, 0x133D, 0x10de, "GRID P100-1B4"),
-ENTRY(0x15F8, 0x137C, 0x10de, "GRID P100-16C"),
-ENTRY(0x15F8, 0x138A, 0x10de, "GRID P100-4C"),
-ENTRY(0x15F8, 0x138B, 0x10de, "GRID P100-8C"),
-ENTRY(0x15F9, 0x122C, 0x10de, "GRID P100X-1B"),
-ENTRY(0x15F9, 0x122D, 0x10de, "GRID P100X-1Q"),
-ENTRY(0x15F9, 0x122E, 0x10de, "GRID P100X-2Q"),
-ENTRY(0x15F9, 0x122F, 0x10de, "GRID P100X-4Q"),
-ENTRY(0x15F9, 0x1230, 0x10de, "GRID P100X-8Q"),
-ENTRY(0x15F9, 0x1231, 0x10de, "GRID P100X-16Q"),
-ENTRY(0x15F9, 0x1232, 0x10de, "GRID P100X-1A"),
-ENTRY(0x15F9, 0x1233, 0x10de, "GRID P100X-2A"),
-ENTRY(0x15F9, 0x1234, 0x10de, "GRID P100X-4A"),
-ENTRY(0x15F9, 0x1235, 0x10de, "GRID P100X-8A"),
-ENTRY(0x15F9, 0x1236, 0x10de, "GRID P100X-16A"),
-ENTRY(0x15F9, 0x128B, 0x10de, "GRID P100X-2B"),
-ENTRY(0x15F9, 0x12F3, 0x10de, "GRID P100X-2B4"),
-ENTRY(0x15F9, 0x133E, 0x10de, "GRID P100X-1B4"),
-ENTRY(0x15F9, 0x137B, 0x10de, "GRID P100X-16C"),
-ENTRY(0x15F9, 0x1388, 0x10de, "GRID P100X-4C"),
-ENTRY(0x15F9, 0x1389, 0x10de, "GRID P100X-8C"),
-ENTRY(0x1B38, 0x11E7, 0x10de, "GRID P40-1B"),
-ENTRY(0x1B38, 0x11E8, 0x10de, "GRID P40-1Q"),
-ENTRY(0x1B38, 0x11E9, 0x10de, "GRID P40-2Q"),
-ENTRY(0x1B38, 0x11EA, 0x10de, "GRID P40-3Q"),
-ENTRY(0x1B38, 0x11EB, 0x10de, "GRID P40-4Q"),
-ENTRY(0x1B38, 0x11EC, 0x10de, "GRID P40-6Q"),
-ENTRY(0x1B38, 0x11ED, 0x10de, "GRID P40-8Q"),
-ENTRY(0x1B38, 0x11EE, 0x10de, "GRID P40-12Q"),
-ENTRY(0x1B38, 0x11EF, 0x10de, "GRID P40-24Q"),
-ENTRY(0x1B38, 0x11F0, 0x10de, "GRID P40-1A"),
-ENTRY(0x1B38, 0x11F1, 0x10de, "GRID P40-2A"),
-ENTRY(0x1B38, 0x11F2, 0x10de, "GRID P40-3A"),
-ENTRY(0x1B38, 0x11F3, 0x10de, "GRID P40-4A"),
-ENTRY(0x1B38, 0x11F4, 0x10de, "GRID P40-6A"),
-ENTRY(0x1B38, 0x11F5, 0x10de, "GRID P40-8A"),
-ENTRY(0x1B38, 0x11F6, 0x10de, "GRID P40-12A"),
-ENTRY(0x1B38, 0x11F7, 0x10de, "GRID P40-24A"),
-ENTRY(0x1B38, 0x1287, 0x10de, "GRID P40-2B"),
-ENTRY(0x1B38, 0x12B1, 0x10de, "GeForce GTX P40-24"),
-ENTRY(0x1B38, 0x12B2, 0x10de, "GeForce GTX P40-12"),
-ENTRY(0x1B38, 0x12B3, 0x10de, "GeForce GTX P40-6"),
-ENTRY(0x1B38, 0x12EF, 0x10de, "GRID P40-2B4"),
-ENTRY(0x1B38, 0x133A, 0x10de, "GRID P40-1B4"),
-ENTRY(0x1B38, 0x137E, 0x10de, "GRID P40-24C"),
-ENTRY(0x1B38, 0x1381, 0x10de, "GRID P40-4C"),
-ENTRY(0x1B38, 0x1382, 0x10de, "GRID P40-6C"),
-ENTRY(0x1B38, 0x1383, 0x10de, "GRID P40-8C"),
-ENTRY(0x1B38, 0x1384, 0x10de, "GRID P40-12C"),
-ENTRY(0x1B38, 0x13B0, 0x10de, "GRID GTX P40-6"),
-ENTRY(0x1B38, 0x13B1, 0x10de, "GRID GTX P40-12"),
-ENTRY(0x1B38, 0x13B2, 0x10de, "GRID GTX P40-24"),
-ENTRY(0x1B38, 0x13D0, 0x10de, "GRID GTX P40-8"),
-ENTRY(0x1BB3, 0x1203, 0x10de, "GRID P4-1B"),
-ENTRY(0x1BB3, 0x1204, 0x10de, "GRID P4-1Q"),
-ENTRY(0x1BB3, 0x1205, 0x10de, "GRID P4-2Q"),
-ENTRY(0x1BB3, 0x1206, 0x10de, "GRID P4-4Q"),
-ENTRY(0x1BB3, 0x1207, 0x10de, "GRID P4-8Q"),
-ENTRY(0x1BB3, 0x1208, 0x10de, "GRID P4-1A"),
-ENTRY(0x1BB3, 0x1209, 0x10de, "GRID P4-2A"),
-ENTRY(0x1BB3, 0x120A, 0x10de, "GRID P4-4A"),
-ENTRY(0x1BB3, 0x120B, 0x10de, "GRID P4-8A"),
-ENTRY(0x1BB3, 0x1288, 0x10de, "GRID P4-2B"),
-ENTRY(0x1BB3, 0x12F1, 0x10de, "GRID P4-2B4"),
-ENTRY(0x1BB3, 0x133C, 0x10de, "GRID P4-1B4"),
-ENTRY(0x1BB3, 0x136D, 0x10de, "GRID GTX P4-2"),
-ENTRY(0x1BB3, 0x136E, 0x10de, "GRID GTX P4-4"),
-ENTRY(0x1BB3, 0x136F, 0x10de, "GRID GTX P4-8"),
-ENTRY(0x1BB3, 0x1380, 0x10de, "GRID P4-8C"),
-ENTRY(0x1BB3, 0x1385, 0x10de, "GRID P4-4C"),
-ENTRY(0x1BB4, 0x11F8, 0x10de, "GRID P6-1B"),
-ENTRY(0x1BB4, 0x11F9, 0x10de, "GRID P6-1Q"),
-ENTRY(0x1BB4, 0x11FA, 0x10de, "GRID P6-2Q"),
-ENTRY(0x1BB4, 0x11FB, 0x10de, "GRID P6-4Q"),
-ENTRY(0x1BB4, 0x11FC, 0x10de, "GRID P6-8Q"),
-ENTRY(0x1BB4, 0x11FD, 0x10de, "GRID P6-16Q"),
-ENTRY(0x1BB4, 0x11FE, 0x10de, "GRID P6-1A"),
-ENTRY(0x1BB4, 0x11FF, 0x10de, "GRID P6-2A"),
-ENTRY(0x1BB4, 0x1200, 0x10de, "GRID P6-4A"),
-ENTRY(0x1BB4, 0x1201, 0x10de, "GRID P6-8A"),
-ENTRY(0x1BB4, 0x1202, 0x10de, "GRID P6-16A"),
-ENTRY(0x1BB4, 0x1289, 0x10de, "GRID P6-2B"),
-ENTRY(0x1BB4, 0x12F0, 0x10de, "GRID P6-2B4"),
-ENTRY(0x1BB4, 0x133B, 0x10de, "GRID P6-1B4"),
-ENTRY(0x1BB4, 0x137F, 0x10de, "GRID P6-16C"),
-ENTRY(0x1BB4, 0x1386, 0x10de, "GRID P6-4C"),
-ENTRY(0x1BB4, 0x1387, 0x10de, "GRID P6-8C"),
 ENTRY(0x1DB1, 0x1259, 0x10de, "GRID V100X-1B"),
 ENTRY(0x1DB1, 0x125A, 0x10de, "GRID V100X-1Q"),
 ENTRY(0x1DB1, 0x125B, 0x10de, "GRID V100X-2Q"),
@ -813,6 +668,20 @@ ENTRY(0x2324, 0x18E0, 0x10de, "NVIDIA H800XM-16C"),
 ENTRY(0x2324, 0x18E1, 0x10de, "NVIDIA H800XM-20C"),
 ENTRY(0x2324, 0x18E2, 0x10de, "NVIDIA H800XM-40C"),
 ENTRY(0x2324, 0x18E3, 0x10de, "NVIDIA H800XM-80C"),
+ENTRY(0x2329, 0x2028, 0x10de, "NVIDIA H20-1-12CME"),
+ENTRY(0x2329, 0x2029, 0x10de, "NVIDIA H20-1-12C"),
+ENTRY(0x2329, 0x202A, 0x10de, "NVIDIA H20-1-24C"),
+ENTRY(0x2329, 0x202B, 0x10de, "NVIDIA H20-2-24C"),
+ENTRY(0x2329, 0x202C, 0x10de, "NVIDIA H20-3-48C"),
+ENTRY(0x2329, 0x202D, 0x10de, "NVIDIA H20-4-48C"),
+ENTRY(0x2329, 0x202E, 0x10de, "NVIDIA H20-7-96C"),
+ENTRY(0x2329, 0x202F, 0x10de, "NVIDIA H20-4C"),
+ENTRY(0x2329, 0x2030, 0x10de, "NVIDIA H20-6C"),
+ENTRY(0x2329, 0x2031, 0x10de, "NVIDIA H20-12C"),
+ENTRY(0x2329, 0x2032, 0x10de, "NVIDIA H20-16C"),
+ENTRY(0x2329, 0x2033, 0x10de, "NVIDIA H20-24C"),
+ENTRY(0x2329, 0x2034, 0x10de, "NVIDIA H20-48C"),
+ENTRY(0x2329, 0x2035, 0x10de, "NVIDIA H20-96C"),
 ENTRY(0x2330, 0x187A, 0x10de, "NVIDIA H100XM-1-10CME"),
 ENTRY(0x2330, 0x187B, 0x10de, "NVIDIA H100XM-1-10C"),
 ENTRY(0x2330, 0x187C, 0x10de, "NVIDIA H100XM-1-20C"),
@ -883,14 +752,14 @@ ENTRY(0x233A, 0x186B, 0x10de, "NVIDIA H800L-15C"),
 ENTRY(0x233A, 0x186C, 0x10de, "NVIDIA H800L-23C"),
 ENTRY(0x233A, 0x186D, 0x10de, "NVIDIA H800L-47C"),
 ENTRY(0x233A, 0x186E, 0x10de, "NVIDIA H800L-94C"),
-ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA H100GL-1-12CME"),
-ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA H100GL-1-12C"),
-ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA H100GL-1-24C"),
-ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA H100GL-2-24C"),
-ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA H100GL-3-48C"),
-ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA H100GL-4-48C"),
-ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA H100GL-7-96C"),
-ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA H100GL-96C"),
+ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA GH200-1-12CME"),
+ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA GH200-1-12C"),
+ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA GH200-1-24C"),
+ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA GH200-2-24C"),
+ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA GH200-3-48C"),
+ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA GH200-4-48C"),
+ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA GH200-7-96C"),
+ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA GH200-96C"),
 ENTRY(0x25B6, 0x159D, 0x10de, "NVIDIA A16-1B"),
 ENTRY(0x25B6, 0x159E, 0x10de, "NVIDIA A16-2B"),
 ENTRY(0x25B6, 0x159F, 0x10de, "NVIDIA A16-1Q"),
@ -987,6 +856,45 @@ ENTRY(0x26B2, 0x1835, 0x10de, "NVIDIA RTX5000-Ada-4C"),
 ENTRY(0x26B2, 0x1836, 0x10de, "NVIDIA RTX5000-Ada-8C"),
 ENTRY(0x26B2, 0x1837, 0x10de, "NVIDIA RTX5000-Ada-16C"),
 ENTRY(0x26B2, 0x1838, 0x10de, "NVIDIA RTX5000-Ada-32C"),
+ENTRY(0x26B3, 0x1958, 0x10de, "NVIDIA RTX 5880-Ada-1B"),
+ENTRY(0x26B3, 0x1959, 0x10de, "NVIDIA RTX 5880-Ada-2B"),
+ENTRY(0x26B3, 0x195A, 0x10de, "NVIDIA RTX 5880-Ada-1Q"),
+ENTRY(0x26B3, 0x195B, 0x10de, "NVIDIA RTX 5880-Ada-2Q"),
+ENTRY(0x26B3, 0x195C, 0x10de, "NVIDIA RTX 5880-Ada-3Q"),
+ENTRY(0x26B3, 0x195D, 0x10de, "NVIDIA RTX 5880-Ada-4Q"),
+ENTRY(0x26B3, 0x195E, 0x10de, "NVIDIA RTX 5880-Ada-6Q"),
+ENTRY(0x26B3, 0x195F, 0x10de, "NVIDIA RTX 5880-Ada-8Q"),
+ENTRY(0x26B3, 0x1960, 0x10de, "NVIDIA RTX 5880-Ada-12Q"),
+ENTRY(0x26B3, 0x1961, 0x10de, "NVIDIA RTX 5880-Ada-16Q"),
+ENTRY(0x26B3, 0x1962, 0x10de, "NVIDIA RTX 5880-Ada-24Q"),
+ENTRY(0x26B3, 0x1963, 0x10de, "NVIDIA RTX 5880-Ada-48Q"),
+ENTRY(0x26B3, 0x1964, 0x10de, "NVIDIA RTX 5880-Ada-1A"),
+ENTRY(0x26B3, 0x1965, 0x10de, "NVIDIA RTX 5880-Ada-2A"),
+ENTRY(0x26B3, 0x1966, 0x10de, "NVIDIA RTX 5880-Ada-3A"),
+ENTRY(0x26B3, 0x1967, 0x10de, "NVIDIA RTX 5880-Ada-4A"),
+ENTRY(0x26B3, 0x1968, 0x10de, "NVIDIA RTX 5880-Ada-6A"),
+ENTRY(0x26B3, 0x1969, 0x10de, "NVIDIA RTX 5880-Ada-8A"),
+ENTRY(0x26B3, 0x196A, 0x10de, "NVIDIA RTX 5880-Ada-12A"),
+ENTRY(0x26B3, 0x196B, 0x10de, "NVIDIA RTX 5880-Ada-16A"),
+ENTRY(0x26B3, 0x196C, 0x10de, "NVIDIA RTX 5880-Ada-24A"),
+ENTRY(0x26B3, 0x196D, 0x10de, "NVIDIA RTX 5880-Ada-48A"),
+ENTRY(0x26B3, 0x196E, 0x10de, "NVIDIA RTX 5880-Ada-1"),
+ENTRY(0x26B3, 0x196F, 0x10de, "NVIDIA RTX 5880-Ada-2"),
+ENTRY(0x26B3, 0x1970, 0x10de, "NVIDIA RTX 5880-Ada-3"),
+ENTRY(0x26B3, 0x1971, 0x10de, "NVIDIA RTX 5880-Ada-4"),
+ENTRY(0x26B3, 0x1972, 0x10de, "NVIDIA RTX 5880-Ada-6"),
+ENTRY(0x26B3, 0x1973, 0x10de, "NVIDIA RTX 5880-Ada-8"),
+ENTRY(0x26B3, 0x1974, 0x10de, "NVIDIA RTX 5880-Ada-12"),
+ENTRY(0x26B3, 0x1975, 0x10de, "NVIDIA RTX 5880-Ada-16"),
+ENTRY(0x26B3, 0x1976, 0x10de, "NVIDIA RTX 5880-Ada-24"),
+ENTRY(0x26B3, 0x1977, 0x10de, "NVIDIA RTX 5880-Ada-48"),
+ENTRY(0x26B3, 0x1978, 0x10de, "NVIDIA RTX 5880-Ada-4C"),
+ENTRY(0x26B3, 0x1979, 0x10de, "NVIDIA RTX 5880-Ada-6C"),
+ENTRY(0x26B3, 0x197A, 0x10de, "NVIDIA RTX 5880-Ada-8C"),
+ENTRY(0x26B3, 0x197B, 0x10de, "NVIDIA RTX 5880-Ada-12C"),
+ENTRY(0x26B3, 0x197C, 0x10de, "NVIDIA RTX 5880-Ada-16C"),
+ENTRY(0x26B3, 0x197D, 0x10de, "NVIDIA RTX 5880-Ada-24C"),
+ENTRY(0x26B3, 0x197E, 0x10de, "NVIDIA RTX 5880-Ada-48C"),
 ENTRY(0x26B5, 0x176D, 0x10de, "NVIDIA L40-1B"),
 ENTRY(0x26B5, 0x176E, 0x10de, "NVIDIA L40-2B"),
 ENTRY(0x26B5, 0x176F, 0x10de, "NVIDIA L40-1Q"),
@ -1102,6 +1010,78 @@ ENTRY(0x26B9, 0x18AE, 0x10de, "NVIDIA L40S-12C"),
 ENTRY(0x26B9, 0x18AF, 0x10de, "NVIDIA L40S-16C"),
 ENTRY(0x26B9, 0x18B0, 0x10de, "NVIDIA L40S-24C"),
 ENTRY(0x26B9, 0x18B1, 0x10de, "NVIDIA L40S-48C"),
+ENTRY(0x26BA, 0x1909, 0x10de, "NVIDIA L20-1B"),
+ENTRY(0x26BA, 0x190A, 0x10de, "NVIDIA L20-2B"),
+ENTRY(0x26BA, 0x190B, 0x10de, "NVIDIA L20-1Q"),
+ENTRY(0x26BA, 0x190C, 0x10de, "NVIDIA L20-2Q"),
+ENTRY(0x26BA, 0x190D, 0x10de, "NVIDIA L20-3Q"),
+ENTRY(0x26BA, 0x190E, 0x10de, "NVIDIA L20-4Q"),
+ENTRY(0x26BA, 0x190F, 0x10de, "NVIDIA L20-6Q"),
+ENTRY(0x26BA, 0x1910, 0x10de, "NVIDIA L20-8Q"),
+ENTRY(0x26BA, 0x1911, 0x10de, "NVIDIA L20-12Q"),
+ENTRY(0x26BA, 0x1912, 0x10de, "NVIDIA L20-16Q"),
+ENTRY(0x26BA, 0x1913, 0x10de, "NVIDIA L20-24Q"),
+ENTRY(0x26BA, 0x1914, 0x10de, "NVIDIA L20-48Q"),
+ENTRY(0x26BA, 0x1915, 0x10de, "NVIDIA L20-1A"),
+ENTRY(0x26BA, 0x1916, 0x10de, "NVIDIA L20-2A"),
+ENTRY(0x26BA, 0x1917, 0x10de, "NVIDIA L20-3A"),
+ENTRY(0x26BA, 0x1918, 0x10de, "NVIDIA L20-4A"),
+ENTRY(0x26BA, 0x1919, 0x10de, "NVIDIA L20-6A"),
+ENTRY(0x26BA, 0x191A, 0x10de, "NVIDIA L20-8A"),
+ENTRY(0x26BA, 0x191B, 0x10de, "NVIDIA L20-12A"),
+ENTRY(0x26BA, 0x191C, 0x10de, "NVIDIA L20-16A"),
+ENTRY(0x26BA, 0x191D, 0x10de, "NVIDIA L20-24A"),
+ENTRY(0x26BA, 0x191E, 0x10de, "NVIDIA L20-48A"),
+ENTRY(0x26BA, 0x191F, 0x10de, "NVIDIA GeForce RTX 3050"),
+ENTRY(0x26BA, 0x1920, 0x10de, "NVIDIA GeForce RTX 3060"),
+ENTRY(0x26BA, 0x1921, 0x10de, "NVIDIA L20-1"),
+ENTRY(0x26BA, 0x1922, 0x10de, "NVIDIA L20-2"),
+ENTRY(0x26BA, 0x1923, 0x10de, "NVIDIA L20-3"),
+ENTRY(0x26BA, 0x1924, 0x10de, "NVIDIA L20-4"),
+ENTRY(0x26BA, 0x1925, 0x10de, "NVIDIA L20-6"),
+ENTRY(0x26BA, 0x1926, 0x10de, "NVIDIA L20-8"),
+ENTRY(0x26BA, 0x1927, 0x10de, "NVIDIA L20-12"),
+ENTRY(0x26BA, 0x1928, 0x10de, "NVIDIA L20-16"),
+ENTRY(0x26BA, 0x1929, 0x10de, "NVIDIA L20-24"),
+ENTRY(0x26BA, 0x192A, 0x10de, "NVIDIA L20-48"),
+ENTRY(0x26BA, 0x192B, 0x10de, "NVIDIA L20-4C"),
+ENTRY(0x26BA, 0x192C, 0x10de, "NVIDIA L20-6C"),
+ENTRY(0x26BA, 0x192D, 0x10de, "NVIDIA L20-8C"),
+ENTRY(0x26BA, 0x192E, 0x10de, "NVIDIA L20-12C"),
+ENTRY(0x26BA, 0x192F, 0x10de, "NVIDIA L20-16C"),
+ENTRY(0x26BA, 0x1930, 0x10de, "NVIDIA L20-24C"),
+ENTRY(0x26BA, 0x1931, 0x10de, "NVIDIA L20-48C"),
+ENTRY(0x27B6, 0x1938, 0x10de, "NVIDIA L2-1B"),
+ENTRY(0x27B6, 0x1939, 0x10de, "NVIDIA L2-2B"),
+ENTRY(0x27B6, 0x193A, 0x10de, "NVIDIA L2-1Q"),
+ENTRY(0x27B6, 0x193B, 0x10de, "NVIDIA L2-2Q"),
+ENTRY(0x27B6, 0x193C, 0x10de, "NVIDIA L2-3Q"),
+ENTRY(0x27B6, 0x193D, 0x10de, "NVIDIA L2-4Q"),
+ENTRY(0x27B6, 0x193E, 0x10de, "NVIDIA L2-6Q"),
+ENTRY(0x27B6, 0x193F, 0x10de, "NVIDIA L2-8Q"),
+ENTRY(0x27B6, 0x1940, 0x10de, "NVIDIA L2-12Q"),
+ENTRY(0x27B6, 0x1941, 0x10de, "NVIDIA L2-24Q"),
+ENTRY(0x27B6, 0x1942, 0x10de, "NVIDIA L2-1A"),
+ENTRY(0x27B6, 0x1943, 0x10de, "NVIDIA L2-2A"),
+ENTRY(0x27B6, 0x1944, 0x10de, "NVIDIA L2-3A"),
+ENTRY(0x27B6, 0x1945, 0x10de, "NVIDIA L2-4A"),
+ENTRY(0x27B6, 0x1946, 0x10de, "NVIDIA L2-6A"),
+ENTRY(0x27B6, 0x1947, 0x10de, "NVIDIA L2-8A"),
+ENTRY(0x27B6, 0x1948, 0x10de, "NVIDIA L2-12A"),
+ENTRY(0x27B6, 0x1949, 0x10de, "NVIDIA L2-24A"),
+ENTRY(0x27B6, 0x194A, 0x10de, "NVIDIA L2-1"),
+ENTRY(0x27B6, 0x194B, 0x10de, "NVIDIA L2-2"),
+ENTRY(0x27B6, 0x194C, 0x10de, "NVIDIA L2-3"),
+ENTRY(0x27B6, 0x194D, 0x10de, "NVIDIA L2-4"),
+ENTRY(0x27B6, 0x194E, 0x10de, "NVIDIA L2-6"),
+ENTRY(0x27B6, 0x194F, 0x10de, "NVIDIA L2-8"),
+ENTRY(0x27B6, 0x1950, 0x10de, "NVIDIA L2-12"),
+ENTRY(0x27B6, 0x1951, 0x10de, "NVIDIA L2-24"),
+ENTRY(0x27B6, 0x1952, 0x10de, "NVIDIA L2-4C"),
+ENTRY(0x27B6, 0x1953, 0x10de, "NVIDIA L2-6C"),
+ENTRY(0x27B6, 0x1954, 0x10de, "NVIDIA L2-8C"),
+ENTRY(0x27B6, 0x1955, 0x10de, "NVIDIA L2-12C"),
+ENTRY(0x27B6, 0x1956, 0x10de, "NVIDIA L2-24C"),
 ENTRY(0x27B8, 0x172F, 0x10de, "NVIDIA L4-1B"),
 ENTRY(0x27B8, 0x1730, 0x10de, "NVIDIA L4-2B"),
 ENTRY(0x27B8, 0x1731, 0x10de, "NVIDIA L4-1Q"),
--- a/src/common/shared/inc/g_vgpu_resman_specific.h
+++ b/src/common/shared/inc/g_vgpu_resman_specific.h
@ -18,9 +18,9 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
        { 0x20B9, 0x157F, 0x20B7, 0x1532 },
        { 0x20FD, 0x17F8, 0x20F5, 0x0 },
        { 0x2324, 0x17A8, 0x2324, 0x17A6 },
+        { 0x2329, 0x198C, 0x2329, 0x198B },
        { 0x2330, 0x16C0, 0x2330, 0x16C1 },
        { 0x2336, 0x16C2, 0x2330, 0x16C1 },
-        { 0x2342, 0x1809, 0x2342, 0x1805 },
    };

    for (NvU32 i = 0; i < (sizeof(vgpu_aliases) / sizeof(struct vgpu_alias_details)); ++i) {
@ -136,6 +136,13 @@ static const struct {
    {0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU                                                                    , 1280}, // NVIDIA H800XM-3-40C
    {0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU                                                                         , 1281}, // NVIDIA H800XM-4-40C
    {0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU                                                                             , 1282}, // NVIDIA H800XM-7-80C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1397}, // NVIDIA H20-1-12CME
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU                                                                     , 1398}, // NVIDIA H20-1-12C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU                                                                 , 1399}, // NVIDIA H20-1-24C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU                                                                      , 1400}, // NVIDIA H20-2-24C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU                                                                    , 1401}, // NVIDIA H20-3-48C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU                                                                         , 1402}, // NVIDIA H20-4-48C
+    {0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU                                                                             , 1403}, // NVIDIA H20-7-96C
    {0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
    {0x233610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
    {0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU                                                                     , 1131}, // NVIDIA H100XM-1-10C
@ -178,13 +185,13 @@ static const struct {
    {0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU                                                                    , 1079}, // NVIDIA H800L-3-47C
    {0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU                                                                         , 1080}, // NVIDIA H800L-4-47C
    {0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU                                                                             , 1081}, // NVIDIA H800L-7-94C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA H100GL-1-12CME
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU                                                                     , 1197}, // NVIDIA H100GL-1-12C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU                                                                 , 1198}, // NVIDIA H100GL-1-24C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU                                                                      , 1199}, // NVIDIA H100GL-2-24C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU                                                                    , 1200}, // NVIDIA H100GL-3-48C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU                                                                         , 1201}, // NVIDIA H100GL-4-48C
-    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU                                                                             , 1202}, // NVIDIA H100GL-7-96C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA GH200-1-12CME
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU                                                                     , 1197}, // NVIDIA GH200-1-12C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU                                                                 , 1198}, // NVIDIA GH200-1-24C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU                                                                      , 1199}, // NVIDIA GH200-2-24C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU                                                                    , 1200}, // NVIDIA GH200-3-48C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU                                                                         , 1201}, // NVIDIA GH200-4-48C
+    {0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU                                                                             , 1202}, // NVIDIA GH200-7-96C

 };
 #endif // GENERATE_vgpuSmcTypeIdMappings
--- a/src/nvidia-modeset/src/nvkms-hdmi.c
+++ b/src/nvidia-modeset/src/nvkms-hdmi.c
@ -2397,17 +2397,19 @@ NvBool nvHdmiFrlQueryConfig(
    NvU8 *pHdmiFrlBpc,
    NVDscInfoEvoRec *pDscInfo)
 {
-    // Try first with 10 BPC
-    if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
-                                   pModeTimings,
-                                   pHwTimings,
-                                   b2Heads1Or,
-                                   pValidationParams,
-                                   HDMI_BPC10,
-                                   pConfig,
-                                   pHdmiFrlBpc,
-                                   pDscInfo)) {
-        return TRUE;
+    if (nvDpyIsHdmiDepth30Evo(pDpyEvo)) {
+        // Try first with 10 BPC
+        if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
+                                       pModeTimings,
+                                       pHwTimings,
+                                       b2Heads1Or,
+                                       pValidationParams,
+                                       HDMI_BPC10,
+                                       pConfig,
+                                       pHdmiFrlBpc,
+                                       pDscInfo)) {
+            return TRUE;
+        }
    }

    // Try again with 8 BPC
--- a/src/nvidia/arch/nvalloc/common/inc/gsp/gsp_fw_wpr_meta.h
+++ b/src/nvidia/arch/nvalloc/common/inc/gsp/gsp_fw_wpr_meta.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -223,7 +223,8 @@ typedef struct

 #define GSP_FW_HEAP_FREE_LIST_MAGIC 0x4845415046524545ULL

-#define GSP_FW_FLAGS                    8:0
-#define GSP_FW_FLAGS_CLOCK_BOOST        NVBIT(0)
+#define GSP_FW_FLAGS                            8:0
+#define GSP_FW_FLAGS_CLOCK_BOOST                NVBIT(0)
+#define GSP_FW_FLAGS_RECOVERY_MARGIN_PRESENT    NVBIT(1)

 #endif // GSP_FW_WPR_META_H_
--- a/src/nvidia/arch/nvalloc/common/inc/spdm/rmspdmrsakeys.h
+++ b/src/nvidia/arch/nvalloc/common/inc/spdm/rmspdmrsakeys.h
@ -0,0 +1,160 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+ * @file   rmspdmrsakeys.h
+ * @brief  SPDM Interfaces - RSA keys
+ */
+
+#ifndef _RM_SPDM_RSA_KEYS_H_
+#define _RM_SPDM_RSA_KEYS_H_
+
+//
+// Note !!
+// All key components order must be big-endian(network order).
+// If any change for these key components, need to rebuild GSP/RM code as well.
+//
+#ifndef USE_MBEDTLS
+
+const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
+{
+    0xcd, 0x65, 0xd2, 0xca, 0x74, 0xd1, 0x76, 0x76, 0x9f, 0x18, 0x23, 0x64, 0x55, 0x84, 0x98, 0x60,
+    0x71, 0xeb, 0x42, 0x6d, 0xa4, 0x98, 0xf9, 0x92, 0xea, 0x83, 0x5b, 0x9b, 0xe2, 0x66, 0x8b, 0x43,
+    0x14, 0x4d, 0x5a, 0xb8, 0xdb, 0x68, 0x2b, 0xfa, 0x5f, 0xdf, 0x7f, 0xf4, 0xfd, 0x5e, 0x42, 0x34,
+    0x09, 0x98, 0xa1, 0x21, 0x98, 0x4c, 0x8d, 0xbc, 0x99, 0xdb, 0xea, 0xc1, 0xe3, 0x42, 0xe6, 0x67,
+    0x26, 0x86, 0x2c, 0xd0, 0xdb, 0xf3, 0x9c, 0x12, 0xad, 0xb3, 0x82, 0x93, 0x9c, 0xb9, 0xae, 0x98,
+    0x82, 0xeb, 0x59, 0xb6, 0x5c, 0x09, 0x9f, 0xa6, 0x15, 0x30, 0xa0, 0xc6, 0x77, 0xd5, 0xae, 0xa2,
+    0x91, 0x65, 0x24, 0xc3, 0x7d, 0x9b, 0xa4, 0x2c, 0x31, 0x73, 0x41, 0x26, 0x72, 0xe7, 0x2f, 0xb8,
+    0x60, 0xd8, 0xce, 0xb8, 0xd8, 0x4b, 0x90, 0x6c, 0xa3, 0x19, 0x7e, 0x2b, 0xd5, 0xf6, 0x05, 0x8a,
+    0x2b, 0xb9, 0x9e, 0x27, 0xba, 0x2e, 0x16, 0x81, 0x9a, 0x9e, 0xf5, 0x6c, 0x38, 0x0a, 0x01, 0xea,
+    0xd6, 0xe1, 0xa4, 0x83, 0x76, 0xd0, 0x68, 0xbb, 0x90, 0x63, 0xce, 0x1c, 0x8c, 0x6f, 0x0f, 0x6b,
+    0x65, 0x5b, 0x12, 0xe2, 0x92, 0x60, 0x79, 0x88, 0x99, 0x5e, 0x17, 0x89, 0x4d, 0x54, 0xb1, 0x87,
+    0x9c, 0xe6, 0x6d, 0x3c, 0x56, 0x1c, 0x3a, 0x3e, 0xb9, 0x16, 0x2c, 0xc8, 0xda, 0x1a, 0xfb, 0x5c,
+    0xd7, 0x16, 0x3c, 0x74, 0xda, 0x78, 0xf1, 0x53, 0xd7, 0x66, 0x97, 0x8a, 0x57, 0x0b, 0x86, 0x57,
+    0x2c, 0x45, 0xa1, 0x5e, 0xae, 0x39, 0x39, 0xde, 0xe0, 0x40, 0x6f, 0xdb, 0x4c, 0xd8, 0xc1, 0x8c,
+    0xdb, 0xce, 0xea, 0x05, 0xc0, 0xbc, 0x89, 0x72, 0x15, 0xfd, 0xbf, 0xb2, 0xb7, 0xf2, 0x5d, 0x05,
+    0xdd, 0x2b, 0x53, 0xa4, 0x03, 0x1e, 0x11, 0x67, 0xa7, 0x0f, 0x87, 0xfb, 0x57, 0x48, 0x91, 0xc8,
+    0x02, 0xb7, 0x46, 0x3d, 0x82, 0xcd, 0x06, 0x4a, 0x79, 0x0f, 0xa5, 0x8a, 0xac, 0xfb, 0xb7, 0xc2,
+    0xf0, 0x95, 0x19, 0x4c, 0x78, 0x7a, 0xc7, 0xd7, 0x70, 0xee, 0x6e, 0x59, 0xaf, 0x51, 0x9b, 0x11,
+    0x03, 0xd3, 0x56, 0xb3, 0x05, 0x5a, 0xbb, 0x1f, 0xbd, 0xc2, 0x0e, 0x89, 0x77, 0xb4, 0xc1, 0x02,
+    0xf9, 0x97, 0x56, 0x07, 0x2e, 0x4e, 0x2d, 0x01, 0x73, 0x89, 0x7d, 0xf3, 0xc9, 0x8c, 0x88, 0x2c,
+    0x79, 0xd9, 0x47, 0x34, 0x9e, 0x32, 0x51, 0xd4, 0xa6, 0x7e, 0xd1, 0x08, 0xda, 0xc0, 0x76, 0x24,
+    0x8e, 0x25, 0x73, 0x14, 0x30, 0xd2, 0x17, 0x37, 0xbc, 0xe0, 0x3e, 0xa2, 0x47, 0xff, 0xe2, 0x4e,
+    0x9b, 0x31, 0x6c, 0xe6, 0x54, 0xaf, 0x62, 0x3a, 0xcd, 0xfa, 0x2f, 0xaf, 0x73, 0x2e, 0x73, 0x4a,
+    0x3a, 0x60, 0xa8, 0xa9, 0xfc, 0x77, 0xb2, 0x57, 0xdd, 0x3a, 0xfa, 0xce, 0x35, 0xc3, 0xea, 0xa9
+};
+
+const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x00, 0x01, 0x00, 0x01};
+
+const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
+{
+    0x04, 0x85, 0xc0, 0x6d, 0x6a, 0xc3, 0x0d, 0xeb, 0xb0, 0xb7, 0x14, 0x58, 0x6a, 0x35, 0xa0, 0x31,
+    0x47, 0x70, 0xd9, 0xa6, 0x96, 0x60, 0x33, 0xe6, 0x93, 0x39, 0x4f, 0x34, 0x10, 0x79, 0x17, 0x89,
+    0xe3, 0x91, 0x8c, 0x74, 0xd3, 0x87, 0xe8, 0xa7, 0xfb, 0xa2, 0x6d, 0x2b, 0xd4, 0xc4, 0x55, 0x8a,
+    0xd5, 0xc7, 0x41, 0x8c, 0xfe, 0xd0, 0x78, 0xb2, 0x75, 0x64, 0xcd, 0x37, 0x75, 0xac, 0x8c, 0x6c,
+    0x2b, 0x01, 0xfb, 0x4c, 0xa1, 0xfb, 0x2c, 0x2d, 0x91, 0x1b, 0x89, 0xbd, 0x7c, 0x4e, 0xee, 0x54,
+    0x37, 0x55, 0xbd, 0x44, 0xf1, 0xa3, 0xd8, 0x9b, 0x0c, 0x8e, 0x64, 0x8b, 0xda, 0x29, 0x9f, 0x4d,
+    0xbf, 0x0b, 0xce, 0x12, 0x6b, 0xda, 0x98, 0x73, 0xcc, 0xa6, 0xf0, 0x01, 0x4b, 0xb8, 0x61, 0x69,
+    0x9a, 0xe0, 0x4b, 0x34, 0xf0, 0xb6, 0x41, 0x38, 0xd3, 0x22, 0x85, 0x8f, 0xab, 0x87, 0x5e, 0x39,
+    0xdb, 0x9a, 0x9b, 0xab, 0xde, 0x42, 0x93, 0x86, 0x49, 0x04, 0x44, 0xfd, 0x1c, 0x02, 0xc4, 0x66,
+    0x0a, 0x53, 0x2f, 0x8f, 0x21, 0x77, 0x97, 0x46, 0xc1, 0xf8, 0x20, 0x9a, 0xaa, 0x50, 0xeb, 0xb2,
+    0xfe, 0xa9, 0x51, 0xb2, 0x21, 0x6c, 0xf4, 0x60, 0x18, 0x98, 0x64, 0xc6, 0x46, 0x28, 0x9e, 0x3c,
+    0x11, 0x2d, 0x55, 0xac, 0x65, 0x6c, 0xfb, 0xaf, 0x53, 0xdd, 0xf4, 0x20, 0x7c, 0x04, 0xea, 0x11,
+    0xe8, 0x25, 0x65, 0x15, 0x32, 0x8e, 0x08, 0x84, 0xc7, 0x41, 0x13, 0x58, 0x73, 0x61, 0x63, 0x1f,
+    0xef, 0x30, 0x1d, 0x3d, 0x36, 0x08, 0x3a, 0xb2, 0xf7, 0x25, 0x56, 0xfc, 0x03, 0x78, 0xb5, 0x45,
+    0xf4, 0x60, 0x47, 0x95, 0x1c, 0x6a, 0x2b, 0xfa, 0xe3, 0x1c, 0x2f, 0x0d, 0x2a, 0x90, 0x43, 0x65,
+    0x09, 0xbe, 0x63, 0x71, 0x33, 0xd3, 0x29, 0x31, 0xd5, 0x29, 0x26, 0xa3, 0x15, 0xc2, 0x46, 0x70,
+    0xb4, 0x3e, 0x23, 0xaf, 0xb6, 0xfb, 0x87, 0x8f, 0x39, 0xf0, 0xd2, 0x3e, 0x35, 0x3c, 0xec, 0x7a,
+    0xd2, 0x0f, 0xd8, 0xa4, 0x0c, 0x19, 0xc8, 0xee, 0x47, 0x7c, 0x1e, 0xd6, 0x67, 0x31, 0xe2, 0x9d,
+    0xc0, 0x65, 0x64, 0x60, 0xe7, 0xd2, 0xeb, 0xe1, 0x02, 0xd5, 0x92, 0x7c, 0x51, 0xf1, 0x3b, 0x12,
+    0x00, 0x65, 0xfd, 0x2b, 0x13, 0x15, 0xfa, 0x6d, 0x99, 0x1d, 0xd3, 0x03, 0x77, 0xb1, 0xb0, 0xf0,
+    0x39, 0x7c, 0x27, 0x13, 0x30, 0xba, 0xff, 0x4d, 0x2e, 0xda, 0xe0, 0x37, 0xad, 0xf4, 0x49, 0x0a,
+    0xdd, 0x1e, 0x87, 0x8c, 0xc9, 0x6b, 0xf8, 0xc6, 0xb3, 0x05, 0xeb, 0x6c, 0x5f, 0x84, 0x64, 0x62,
+    0x1c, 0xf6, 0x04, 0x6f, 0xd7, 0xa9, 0xbc, 0x22, 0x97, 0xdb, 0x8d, 0xa5, 0xe1, 0x3a, 0x5c, 0x0d,
+    0x7b, 0x78, 0x25, 0x98, 0x04, 0x7f, 0x2b, 0x59, 0x5b, 0x7c, 0xf7, 0x73, 0x37, 0x7f, 0x92, 0x8d
+};
+
+#else
+
+const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
+{
+    0xa9, 0xea, 0xc3, 0x35, 0xce, 0xfa, 0x3a, 0xdd, 0x57, 0xb2, 0x77, 0xfc, 0xa9, 0xa8, 0x60, 0x3a,
+    0x4a, 0x73, 0x2e, 0x73, 0xaf, 0x2f, 0xfa, 0xcd, 0x3a, 0x62, 0xaf, 0x54, 0xe6, 0x6c, 0x31, 0x9b,
+    0x4e, 0xe2, 0xff, 0x47, 0xa2, 0x3e, 0xe0, 0xbc, 0x37, 0x17, 0xd2, 0x30, 0x14, 0x73, 0x25, 0x8e,
+    0x24, 0x76, 0xc0, 0xda, 0x08, 0xd1, 0x7e, 0xa6, 0xd4, 0x51, 0x32, 0x9e, 0x34, 0x47, 0xd9, 0x79,
+    0x2c, 0x88, 0x8c, 0xc9, 0xf3, 0x7d, 0x89, 0x73, 0x01, 0x2d, 0x4e, 0x2e, 0x07, 0x56, 0x97, 0xf9,
+    0x02, 0xc1, 0xb4, 0x77, 0x89, 0x0e, 0xc2, 0xbd, 0x1f, 0xbb, 0x5a, 0x05, 0xb3, 0x56, 0xd3, 0x03,
+    0x11, 0x9b, 0x51, 0xaf, 0x59, 0x6e, 0xee, 0x70, 0xd7, 0xc7, 0x7a, 0x78, 0x4c, 0x19, 0x95, 0xf0,
+    0xc2, 0xb7, 0xfb, 0xac, 0x8a, 0xa5, 0x0f, 0x79, 0x4a, 0x06, 0xcd, 0x82, 0x3d, 0x46, 0xb7, 0x02,
+    0xc8, 0x91, 0x48, 0x57, 0xfb, 0x87, 0x0f, 0xa7, 0x67, 0x11, 0x1e, 0x03, 0xa4, 0x53, 0x2b, 0xdd,
+    0x05, 0x5d, 0xf2, 0xb7, 0xb2, 0xbf, 0xfd, 0x15, 0x72, 0x89, 0xbc, 0xc0, 0x05, 0xea, 0xce, 0xdb,
+    0x8c, 0xc1, 0xd8, 0x4c, 0xdb, 0x6f, 0x40, 0xe0, 0xde, 0x39, 0x39, 0xae, 0x5e, 0xa1, 0x45, 0x2c,
+    0x57, 0x86, 0x0b, 0x57, 0x8a, 0x97, 0x66, 0xd7, 0x53, 0xf1, 0x78, 0xda, 0x74, 0x3c, 0x16, 0xd7,
+    0x5c, 0xfb, 0x1a, 0xda, 0xc8, 0x2c, 0x16, 0xb9, 0x3e, 0x3a, 0x1c, 0x56, 0x3c, 0x6d, 0xe6, 0x9c,
+    0x87, 0xb1, 0x54, 0x4d, 0x89, 0x17, 0x5e, 0x99, 0x88, 0x79, 0x60, 0x92, 0xe2, 0x12, 0x5b, 0x65,
+    0x6b, 0x0f, 0x6f, 0x8c, 0x1c, 0xce, 0x63, 0x90, 0xbb, 0x68, 0xd0, 0x76, 0x83, 0xa4, 0xe1, 0xd6,
+    0xea, 0x01, 0x0a, 0x38, 0x6c, 0xf5, 0x9e, 0x9a, 0x81, 0x16, 0x2e, 0xba, 0x27, 0x9e, 0xb9, 0x2b,
+    0x8a, 0x05, 0xf6, 0xd5, 0x2b, 0x7e, 0x19, 0xa3, 0x6c, 0x90, 0x4b, 0xd8, 0xb8, 0xce, 0xd8, 0x60,
+    0xb8, 0x2f, 0xe7, 0x72, 0x26, 0x41, 0x73, 0x31, 0x2c, 0xa4, 0x9b, 0x7d, 0xc3, 0x24, 0x65, 0x91,
+    0xa2, 0xae, 0xd5, 0x77, 0xc6, 0xa0, 0x30, 0x15, 0xa6, 0x9f, 0x09, 0x5c, 0xb6, 0x59, 0xeb, 0x82,
+    0x98, 0xae, 0xb9, 0x9c, 0x93, 0x82, 0xb3, 0xad, 0x12, 0x9c, 0xf3, 0xdb, 0xd0, 0x2c, 0x86, 0x26,
+    0x67, 0xe6, 0x42, 0xe3, 0xc1, 0xea, 0xdb, 0x99, 0xbc, 0x8d, 0x4c, 0x98, 0x21, 0xa1, 0x98, 0x09,
+    0x34, 0x42, 0x5e, 0xfd, 0xf4, 0x7f, 0xdf, 0x5f, 0xfa, 0x2b, 0x68, 0xdb, 0xb8, 0x5a, 0x4d, 0x14,
+    0x43, 0x8b, 0x66, 0xe2, 0x9b, 0x5b, 0x83, 0xea, 0x92, 0xf9, 0x98, 0xa4, 0x6d, 0x42, 0xeb, 0x71,
+    0x60, 0x98, 0x84, 0x55, 0x64, 0x23, 0x18, 0x9f, 0x76, 0x76, 0xd1, 0x74, 0xca, 0xd2, 0x65, 0xcd,
+    0x00
+};
+
+const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x01, 0x00, 0x01, 0x00};
+
+const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
+{
+    0x8d, 0x92, 0x7f, 0x37, 0x73, 0xf7, 0x7c, 0x5b, 0x59, 0x2b, 0x7f, 0x04, 0x98, 0x25, 0x78, 0x7b,
+    0x0d, 0x5c, 0x3a, 0xe1, 0xa5, 0x8d, 0xdb, 0x97, 0x22, 0xbc, 0xa9, 0xd7, 0x6f, 0x04, 0xf6, 0x1c,
+    0x62, 0x64, 0x84, 0x5f, 0x6c, 0xeb, 0x05, 0xb3, 0xc6, 0xf8, 0x6b, 0xc9, 0x8c, 0x87, 0x1e, 0xdd,
+    0x0a, 0x49, 0xf4, 0xad, 0x37, 0xe0, 0xda, 0x2e, 0x4d, 0xff, 0xba, 0x30, 0x13, 0x27, 0x7c, 0x39,
+    0xf0, 0xb0, 0xb1, 0x77, 0x03, 0xd3, 0x1d, 0x99, 0x6d, 0xfa, 0x15, 0x13, 0x2b, 0xfd, 0x65, 0x00,
+    0x12, 0x3b, 0xf1, 0x51, 0x7c, 0x92, 0xd5, 0x02, 0xe1, 0xeb, 0xd2, 0xe7, 0x60, 0x64, 0x65, 0xc0,
+    0x9d, 0xe2, 0x31, 0x67, 0xd6, 0x1e, 0x7c, 0x47, 0xee, 0xc8, 0x19, 0x0c, 0xa4, 0xd8, 0x0f, 0xd2,
+    0x7a, 0xec, 0x3c, 0x35, 0x3e, 0xd2, 0xf0, 0x39, 0x8f, 0x87, 0xfb, 0xb6, 0xaf, 0x23, 0x3e, 0xb4,
+    0x70, 0x46, 0xc2, 0x15, 0xa3, 0x26, 0x29, 0xd5, 0x31, 0x29, 0xd3, 0x33, 0x71, 0x63, 0xbe, 0x09,
+    0x65, 0x43, 0x90, 0x2a, 0x0d, 0x2f, 0x1c, 0xe3, 0xfa, 0x2b, 0x6a, 0x1c, 0x95, 0x47, 0x60, 0xf4,
+    0x45, 0xb5, 0x78, 0x03, 0xfc, 0x56, 0x25, 0xf7, 0xb2, 0x3a, 0x08, 0x36, 0x3d, 0x1d, 0x30, 0xef,
+    0x1f, 0x63, 0x61, 0x73, 0x58, 0x13, 0x41, 0xc7, 0x84, 0x08, 0x8e, 0x32, 0x15, 0x65, 0x25, 0xe8,
+    0x11, 0xea, 0x04, 0x7c, 0x20, 0xf4, 0xdd, 0x53, 0xaf, 0xfb, 0x6c, 0x65, 0xac, 0x55, 0x2d, 0x11,
+    0x3c, 0x9e, 0x28, 0x46, 0xc6, 0x64, 0x98, 0x18, 0x60, 0xf4, 0x6c, 0x21, 0xb2, 0x51, 0xa9, 0xfe,
+    0xb2, 0xeb, 0x50, 0xaa, 0x9a, 0x20, 0xf8, 0xc1, 0x46, 0x97, 0x77, 0x21, 0x8f, 0x2f, 0x53, 0x0a,
+    0x66, 0xc4, 0x02, 0x1c, 0xfd, 0x44, 0x04, 0x49, 0x86, 0x93, 0x42, 0xde, 0xab, 0x9b, 0x9a, 0xdb,
+    0x39, 0x5e, 0x87, 0xab, 0x8f, 0x85, 0x22, 0xd3, 0x38, 0x41, 0xb6, 0xf0, 0x34, 0x4b, 0xe0, 0x9a,
+    0x69, 0x61, 0xb8, 0x4b, 0x01, 0xf0, 0xa6, 0xcc, 0x73, 0x98, 0xda, 0x6b, 0x12, 0xce, 0x0b, 0xbf,
+    0x4d, 0x9f, 0x29, 0xda, 0x8b, 0x64, 0x8e, 0x0c, 0x9b, 0xd8, 0xa3, 0xf1, 0x44, 0xbd, 0x55, 0x37,
+    0x54, 0xee, 0x4e, 0x7c, 0xbd, 0x89, 0x1b, 0x91, 0x2d, 0x2c, 0xfb, 0xa1, 0x4c, 0xfb, 0x01, 0x2b,
+    0x6c, 0x8c, 0xac, 0x75, 0x37, 0xcd, 0x64, 0x75, 0xb2, 0x78, 0xd0, 0xfe, 0x8c, 0x41, 0xc7, 0xd5,
+    0x8a, 0x55, 0xc4, 0xd4, 0x2b, 0x6d, 0xa2, 0xfb, 0xa7, 0xe8, 0x87, 0xd3, 0x74, 0x8c, 0x91, 0xe3,
+    0x89, 0x17, 0x79, 0x10, 0x34, 0x4f, 0x39, 0x93, 0xe6, 0x33, 0x60, 0x96, 0xa6, 0xd9, 0x70, 0x47,
+    0x31, 0xa0, 0x35, 0x6a, 0x58, 0x14, 0xb7, 0xb0, 0xeb, 0x0d, 0xc3, 0x6a, 0x6d, 0xc0, 0x85, 0x04
+};
+
+#endif
+
+#endif // _RM_SPDM_RSA_KEYS_H_
--- a/src/nvidia/arch/nvalloc/unix/include/nv-reg.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv-reg.h
@ -839,6 +839,45 @@
 #define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
 #define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)

+/*
+ * Option: NVreg_ImexChannelCount
+ *
+ * Description:
+ *
+ * This option allows users to specify the number of IMEX (import/export)
+ * channels. Within an IMEX domain, the channels allow sharing memory
+ * securely in a multi-user environment using the CUDA driver's fabric handle
+ * based APIs.
+ *
+ * An IMEX domain is either an OS instance or a group of securely
+ * connected OS instances using the NVIDIA IMEX daemon. The option must
+ * be set to the same value on each OS instance within the IMEX domain.
+ *
+ * An IMEX channel is a logical entity that is represented by a /dev node.
+ * The IMEX channels are global resources within the IMEX domain. When
+ * exporter and importer CUDA processes have been granted access to the
+ * same IMEX channel, they can securely share memory.
+ *
+ * Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
+ * the related CUDA APIs will fail with an insufficient permission error until
+ * the /dev nodes are set up. The creation of these /dev nodes,
+ * /dev/nvidia-caps-imex-channels/channelN, must be handled by the
+ * administrator, where N is the minor number. The major number can be
+ * queried from /proc/devices.
+ *
+ * nvidia-modprobe CLI support is available to set up the /dev nodes.
+ * NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
+ * and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
+ *
+ * Possible values:
+ *  0 - Disable IMEX using CUDA driver's fabric handles.
+ *  N - N IMEX channels will be enabled in the driver to facilitate N
+ *      concurrent users. Default value is 2048 channels, and the current
+ *      maximum value is 20-bit, same as Linux dev_t's minor number limit.
+ */
+#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
+#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
+
 #if defined(NV_DEFINE_REGISTRY_KEY_TABLE)

 /*
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
 NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
 NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
 NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
+NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);

 /*
 *----------------registry database definition----------------------
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
+    NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
    {NULL, NULL}
 };

--- a/src/nvidia/arch/nvalloc/unix/include/nv.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv.h
@ -621,6 +621,14 @@ typedef enum
 #define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv)    \
        (((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)

+/*
+ * For console setup by EFI GOP, the base address is BAR1.
+ * For console setup by VBIOS, the base address is BAR2 + 16MB.
+ */
+#define NV_IS_CONSOLE_MAPPED(nv, addr)  \
+        (((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
+         ((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
+
 #define NV_SOC_IS_ISO_IOMMU_PRESENT(nv)     \
        ((nv)->iommus.iso_iommu_present)

@ -878,6 +886,8 @@ NvBool    NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
 NvU32     NV_API_CALL nv_get_os_type(void);

 void      NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
+void      NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
+
 struct dma_buf;
 typedef struct nv_dma_buf nv_dma_buf_t;
 struct drm_gem_object;
--- a/src/nvidia/arch/nvalloc/unix/include/os-interface.h
+++ b/src/nvidia/arch/nvalloc/unix/include/os-interface.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -160,10 +160,9 @@ NvBool      NV_API_CALL  os_is_vgx_hyper             (void);
 NV_STATUS   NV_API_CALL  os_inject_vgx_msi           (NvU16, NvU64, NvU32);
 NvBool      NV_API_CALL  os_is_grid_supported        (void);
 NvU32       NV_API_CALL  os_get_grid_csp_support     (void);
-void        NV_API_CALL  os_get_screen_info          (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
 void        NV_API_CALL  os_bug_check                (NvU32, const char *);
 NV_STATUS   NV_API_CALL  os_lock_user_pages          (void *, NvU64, void **, NvU32);
-NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **, void**);
+NV_STATUS   NV_API_CALL  os_lookup_user_io_memory    (void *, NvU64, NvU64 **);
 NV_STATUS   NV_API_CALL  os_unlock_user_pages        (NvU64, void *);
 NV_STATUS   NV_API_CALL  os_match_mmap_offset        (void *, NvU64, NvU64 *);
 NV_STATUS   NV_API_CALL  os_get_euid                 (NvU32 *);
@ -198,6 +197,8 @@ nv_cap_t*   NV_API_CALL  os_nv_cap_create_file_entry  (nv_cap_t *, const char *,
 void        NV_API_CALL  os_nv_cap_destroy_entry      (nv_cap_t *);
 int         NV_API_CALL  os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
 void        NV_API_CALL  os_nv_cap_close_fd           (int);
+NvS32       NV_API_CALL  os_imex_channel_get          (NvU64);
+NvS32       NV_API_CALL  os_imex_channel_count        (void);

 enum os_pci_req_atomics_type {
    OS_INTF_PCIE_REQ_ATOMICS_32BIT,
@ -219,6 +220,7 @@ extern NvU8  os_page_shift;
 extern NvBool os_cc_enabled;
 extern NvBool os_cc_tdx_enabled;
 extern NvBool os_dma_buf_enabled;
+extern NvBool os_imex_channel_is_supported;

 /*
 * ---------------------------------------------------------------------------
--- a/src/nvidia/arch/nvalloc/unix/src/escape.c
+++ b/src/nvidia/arch/nvalloc/unix/src/escape.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -167,25 +167,11 @@ static void RmCreateOsDescriptor(NVOS32_PARAMETERS *pApi, API_SECURITY_INFO secI
    }
    else if (rmStatus == NV_ERR_INVALID_ADDRESS)
    {
-        rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount,
-                &pPteArray, &pPageArray);
+        rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount, &pPteArray);
        if (rmStatus == NV_OK)
        {
-            if (pPageArray != NULL)
-            {
-                pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPageArray;
-                pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_PAGE_ARRAY;
-            }
-            else if (pPteArray != NULL)
-            {
-                pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
-                pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
-            }
-            else
-            {
-                NV_ASSERT_FAILED("unknown memory import type");
-                rmStatus = NV_ERR_NOT_SUPPORTED;
-            }
+            pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
+            pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
        }
    }
    if (rmStatus != NV_OK)
--- a/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os-hypervisor.c
@ -1165,6 +1165,11 @@ NV_STATUS osIsVfioPciCorePresent(void)
    return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VFIO_PCI_CORE_PRESENT);
 }

+NvU32 osGetGridCspSupport(void)
+{
+    return os_get_grid_csp_support();
+}
+
 void initVGXSpecificRegistry(OBJGPU *pGpu)
 {
    NvU32 data32;
--- a/src/nvidia/arch/nvalloc/unix/src/os.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os.c
@ -4828,6 +4828,39 @@ osRmCapInitDescriptor
    *pCapDescriptor = NV_U64_MAX;
 }

+/*
+ * @brief Checks if IMEX channel support is present.
+ */
+NvBool
+osImexChannelIsSupported(void)
+{
+    return os_imex_channel_is_supported;
+}
+
+/*
+ * @brief Returns IMEX channel count.
+ */
+NvS32
+osImexChannelCount
+(
+    void
+)
+{
+    return os_imex_channel_count();
+}
+
+/*
+ * @brief Returns IMEX channel number.
+ *
+ * @param[in] descriptor   OS specific descriptor to query channel number.
+ *
+ */
+NvS32
+osImexChannelGet(NvU64 descriptor)
+{
+    return os_imex_channel_get(descriptor);
+}
+
 /*
 * @brief Generates random bytes which can be used as a universally unique
 *        identifier.
--- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
@ -2153,6 +2153,7 @@ static NV_STATUS RmCreateMmapContextLocked(
    RsClient *pClient = staticCast(pRmClient, RsClient);
    KernelMemorySystem *pKernelMemorySystem = NULL;
    NvBool bCoherentAtsCpuOffset = NV_FALSE;
+    NvBool bSriovHostCoherentFbOffset = NV_FALSE;
    nv_state_t *pNv = NULL;
    NvU64 addr = (NvU64)address;
    NvU32 prot = 0;
@ -2200,6 +2201,8 @@ static NV_STATUS RmCreateMmapContextLocked(
        pNv = NV_GET_NV_STATE(pGpu);
        pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
        bCoherentAtsCpuOffset = IS_COHERENT_CPU_ATS_OFFSET(pKernelMemorySystem, addr, size);
+        bSriovHostCoherentFbOffset = os_is_vgx_hyper() &&
+            IS_COHERENT_FB_OFFSET(pKernelMemorySystem, addr, size);
    }

    //
@ -2210,7 +2213,7 @@ static NV_STATUS RmCreateMmapContextLocked(
    if ((pNv == NULL) ||
        (!IS_REG_OFFSET(pNv, addr, size) &&
         !IS_FB_OFFSET(pNv, addr, size) &&
-         !bCoherentAtsCpuOffset &&
+         !(bCoherentAtsCpuOffset || bSriovHostCoherentFbOffset) &&
         !IS_IMEM_OFFSET(pNv, addr, size)))
    {
        pNv = nv_get_ctl_state();
@ -2240,6 +2243,38 @@ static NV_STATUS RmCreateMmapContextLocked(
                goto done;
            }
        }
+        else if (bSriovHostCoherentFbOffset)
+        {
+            status = RmGetMmapPteArray(pKernelMemorySystem, pClient, hMemory, nvuap);
+            if (status != NV_OK)
+            {
+                goto done;
+            }
+
+            //
+            // nvuap->page_array(allocated in RmGetMmapPteArray) is not assigned
+            // to nvamc->page_array if onlining status is false(which is the case with
+            // bSriovHostCoherentFbOffset) and so doesn't get freed if not done here.
+            // The call to RmGetMmapPteArray is for getting the contig and num
+            // pages of the allocation.
+            //
+            os_free_mem(nvuap->page_array);
+            nvuap->page_array = NULL;
+
+            //
+            // This path is taken in the case of self-hosted SRIOV host where
+            // the coherent GPU memory is not onlined but the CPU mapping to
+            // the coherent GPU memory is done via C2C(instead of BAR1) and so
+            // only contig can be supported for now.
+            //
+            if (!nvuap->contig && (nvuap->num_pages > 1))
+            {
+                NV_PRINTF(LEVEL_ERROR, "Mapping of Non-contig allocation for "
+                          "not onlined coherent GPU memory not supported\n");
+                status = NV_ERR_NOT_SUPPORTED;
+                goto done;
+            }
+        }

        if (RmSetUserMapAccessRange(nvuap) != NV_OK)
        {
@ -5377,16 +5412,11 @@ NvBool rm_get_uefi_console_status(
    NvU64 fbBaseAddress = 0;
    NvBool bConsoleDevice = NV_FALSE;

-    // os_get_screen_info() will return dimensions and an address for
-    // any fbdev driver (e.g., efifb, vesafb, etc).  To find if this is a
-    // UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
-    // driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
-    // it will point to BAR2 + 16MB.
-    os_get_screen_info(&fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
-                       nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
-                       nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
-
-    fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
+    //
+    // nv_get_screen_info() will return dimensions and an address for
+    // any fbdev driver (e.g., efifb, vesafb, etc).
+    //
+    nv_get_screen_info(nv, &fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);

    bConsoleDevice = (fbSize != 0);

@ -5403,16 +5433,11 @@ NvU64 rm_get_uefi_console_size(

    fbSize = fbWidth = fbHeight = fbDepth = fbPitch = 0;

-    // os_get_screen_info() will return dimensions and an address for
-    // any fbdev driver (e.g., efifb, vesafb, etc).  To find if this is a
-    // UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
-    // driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
-    // it will point to BAR2 + 16MB.
-    os_get_screen_info(pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
-                       nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
-                       nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
-
-    fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
+    //
+    // nv_get_screen_info() will return dimensions and an address for
+    // any fbdev driver (e.g., efifb, vesafb, etc).
+    //
+    nv_get_screen_info(nv, pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);

    return fbSize;
 }
--- a/src/nvidia/arch/nvalloc/unix/src/rm-gpu-ops.c
+++ b/src/nvidia/arch/nvalloc/unix/src/rm-gpu-ops.c
@ -582,13 +582,14 @@ NV_STATUS  NV_API_CALL  rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *sp,
    return rmStatus;
 }

-NV_STATUS  NV_API_CALL  rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
-                                                                 gpuDeviceHandle device)
+NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
+                                                                gpuFaultInfo *pFaultInfo,
+                                                                NvBool bCopyAndFlush)
 {
    NV_STATUS rmStatus;
    void *fp;
    NV_ENTER_RM_RUNTIME(sp,fp);
-    rmStatus = nvGpuOpsFlushReplayableFaultBuffer(device);
+    rmStatus = nvGpuOpsFlushReplayableFaultBuffer(pFaultInfo, bCopyAndFlush);
    NV_EXIT_RM_RUNTIME(sp,fp);
    return rmStatus;
 }
--- a/src/nvidia/arch/nvalloc/unix/src/unix_console.c
+++ b/src/nvidia/arch/nvalloc/unix/src/unix_console.c
@ -348,7 +348,7 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL

            if ((memmgrGetReservedConsoleMemDesc(pGpu, pMemoryManager) != NULL) && bContinue)
            {
-                NvU64 baseAddr;
+                NvU64 baseAddr, size;
                NvU32 width, height, depth, pitch;

                // There should only be one.
@ -357,9 +357,8 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL
                pParams->subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);

                // Console is either mapped to BAR1 or BAR2 + 16 MB
-                os_get_screen_info(&baseAddr, &width, &height, &depth, &pitch,
-                                   nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
-                                   nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
+                nv_get_screen_info(nv, &baseAddr, &width, &height, &depth,
+                                   &pitch, &size);

                pParams->width = (NvU16)width;
                pParams->height = (NvU16)height;
--- a/src/nvidia/arch/nvalloc/unix/src/unix_intr.c
+++ b/src/nvidia/arch/nvalloc/unix/src/unix_intr.c
@ -622,7 +622,7 @@ NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(

        // Copies all valid packets in RM's and client's shadow buffer
        status  = kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, &threadState, faultsCopied,
-                                         NON_REPLAYABLE_FAULT_BUFFER);
+                                         NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);

        threadStateFreeISRAndDeferredIntHandler(&threadState, pGpu, THREAD_STATE_FLAGS_IS_ISR);
        tlsIsrDestroy(pIsrAllocator);
@ -659,7 +659,7 @@ static NV_STATUS _rm_gpu_copy_mmu_faults_unlocked(

    // Copies all valid packets in RM's and client's shadow buffer
    return kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, pThreadState, pFaultsCopied,
-                                  NON_REPLAYABLE_FAULT_BUFFER);
+                                  NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);

    return NV_OK;
 }
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
--- a/src/nvidia/generated/g_client_nvoc.h
+++ b/src/nvidia/generated/g_client_nvoc.h
@ -156,6 +156,7 @@ struct RmClient {
    PSECURITY_TOKEN pSecurityToken;
    struct UserInfo *pUserInfo;
    NvBool bIsClientVirtualMode;
+    NvS32 imexChannel;
    PNODE pCliSyncGpuBoostTree;
 };

--- a/src/nvidia/generated/g_gpu_nvoc.c
+++ b/src/nvidia/generated/g_gpu_nvoc.c
@ -1136,28 +1136,6 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
        pThis->__gpuUpdateErrorContainmentState__ = &gpuUpdateErrorContainmentState_c04480;
    }

-    // Hal function -- gpuCheckEccCounts
-    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */ 
-    {
-        pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_TU102;
-    }
-    // default
-    else
-    {
-        pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_d44104;
-    }
-
-    // Hal function -- gpuClearEccCounts
-    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */ 
-    {
-        pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_TU102;
-    }
-    // default
-    else
-    {
-        pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_ac1694;
-    }
-
    // Hal function -- gpuWaitForGfwBootComplete
    if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */ 
    {
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 2004-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -973,8 +973,6 @@ struct OBJGPU {
    NvBool (*__gpuIsDevModeEnabledInHw__)(struct OBJGPU *);
    NvBool (*__gpuIsCtxBufAllocInPmaSupported__)(struct OBJGPU *);
    NV_STATUS (*__gpuUpdateErrorContainmentState__)(struct OBJGPU *, NV_ERROR_CONT_ERR_ID, NV_ERROR_CONT_LOCATION, NvU32 *);
-    void (*__gpuCheckEccCounts__)(struct OBJGPU *);
-    NV_STATUS (*__gpuClearEccCounts__)(struct OBJGPU *);
    NV_STATUS (*__gpuWaitForGfwBootComplete__)(struct OBJGPU *);
    NvBool (*__gpuGetIsCmpSku__)(struct OBJGPU *);
    NvBool PDB_PROP_GPU_HIGH_SPEED_BRIDGE_CONNECTED;
@ -1236,6 +1234,7 @@ struct OBJGPU {
    NvBool bStateUnloading;
    NvBool bStateLoaded;
    NvBool bFullyConstructed;
+    NvBool bRecoveryMarginPresent;
    NvBool bBf3WarBug4040336Enabled;
    NvBool bUnifiedMemorySpaceEnabled;
    NvBool bSriovEnabled;
@ -1633,10 +1632,6 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
 #define gpuIsCtxBufAllocInPmaSupported_HAL(pGpu) gpuIsCtxBufAllocInPmaSupported_DISPATCH(pGpu)
 #define gpuUpdateErrorContainmentState(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
 #define gpuUpdateErrorContainmentState_HAL(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
-#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
-#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
-#define gpuClearEccCounts(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
-#define gpuClearEccCounts_HAL(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
 #define gpuWaitForGfwBootComplete(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
 #define gpuWaitForGfwBootComplete_HAL(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
 #define gpuGetIsCmpSku(pGpu) gpuGetIsCmpSku_DISPATCH(pGpu)
@ -2557,6 +2552,34 @@ static inline NV_STATUS gpuSetPartitionErrorAttribution(struct OBJGPU *pGpu, NV_

 #define gpuSetPartitionErrorAttribution_HAL(pGpu, arg0, arg1, arg2) gpuSetPartitionErrorAttribution(pGpu, arg0, arg1, arg2)

+NV_STATUS gpuCreateRusdMemory_IMPL(struct OBJGPU *pGpu);
+
+
+#ifdef __nvoc_gpu_h_disabled
+static inline NV_STATUS gpuCreateRusdMemory(struct OBJGPU *pGpu) {
+    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
+    return NV_ERR_NOT_SUPPORTED;
+}
+#else //__nvoc_gpu_h_disabled
+#define gpuCreateRusdMemory(pGpu) gpuCreateRusdMemory_IMPL(pGpu)
+#endif //__nvoc_gpu_h_disabled
+
+#define gpuCreateRusdMemory_HAL(pGpu) gpuCreateRusdMemory(pGpu)
+
+NvBool gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
+
+
+#ifdef __nvoc_gpu_h_disabled
+static inline NvBool gpuCheckEccCounts(struct OBJGPU *pGpu) {
+    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
+    return NV_FALSE;
+}
+#else //__nvoc_gpu_h_disabled
+#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_TU102(pGpu)
+#endif //__nvoc_gpu_h_disabled
+
+#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts(pGpu)
+
 NV_STATUS gpuConstructDeviceInfoTable_FWCLIENT(struct OBJGPU *pGpu);

 NV_STATUS gpuConstructDeviceInfoTable_VGPUSTUB(struct OBJGPU *pGpu);
@ -3147,26 +3170,6 @@ static inline NV_STATUS gpuUpdateErrorContainmentState_DISPATCH(struct OBJGPU *p
    return pGpu->__gpuUpdateErrorContainmentState__(pGpu, arg0, arg1, arg2);
 }

-static inline void gpuCheckEccCounts_d44104(struct OBJGPU *pGpu) {
-    return;
-}
-
-void gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
-
-static inline void gpuCheckEccCounts_DISPATCH(struct OBJGPU *pGpu) {
-    pGpu->__gpuCheckEccCounts__(pGpu);
-}
-
-static inline NV_STATUS gpuClearEccCounts_ac1694(struct OBJGPU *pGpu) {
-    return NV_OK;
-}
-
-NV_STATUS gpuClearEccCounts_TU102(struct OBJGPU *pGpu);
-
-static inline NV_STATUS gpuClearEccCounts_DISPATCH(struct OBJGPU *pGpu) {
-    return pGpu->__gpuClearEccCounts__(pGpu);
-}
-
 NV_STATUS gpuWaitForGfwBootComplete_TU102(struct OBJGPU *pGpu);

 static inline NV_STATUS gpuWaitForGfwBootComplete_5baef9(struct OBJGPU *pGpu) {
@ -4458,16 +4461,6 @@ static inline NV_STATUS gpuSanityCheckRegisterAccess(struct OBJGPU *pGpu, NvU32
 #define gpuSanityCheckRegisterAccess(pGpu, addr, pRetVal) gpuSanityCheckRegisterAccess_IMPL(pGpu, addr, pRetVal)
 #endif //__nvoc_gpu_h_disabled

-void gpuUpdateUserSharedData_IMPL(struct OBJGPU *pGpu);
-
-#ifdef __nvoc_gpu_h_disabled
-static inline void gpuUpdateUserSharedData(struct OBJGPU *pGpu) {
-    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
-}
-#else //__nvoc_gpu_h_disabled
-#define gpuUpdateUserSharedData(pGpu) gpuUpdateUserSharedData_IMPL(pGpu)
-#endif //__nvoc_gpu_h_disabled
-
 NV_STATUS gpuValidateRegOffset_IMPL(struct OBJGPU *pGpu, NvU32 arg0);

 #ifdef __nvoc_gpu_h_disabled
@ -4523,6 +4516,38 @@ static inline NV_STATUS gpuGc6Exit(struct OBJGPU *pGpu, NV2080_CTRL_GC6_EXIT_PAR
 #define gpuGc6Exit(pGpu, arg0) gpuGc6Exit_IMPL(pGpu, arg0)
 #endif //__nvoc_gpu_h_disabled

+void gpuDestroyRusdMemory_IMPL(struct OBJGPU *pGpu);
+
+#ifdef __nvoc_gpu_h_disabled
+static inline void gpuDestroyRusdMemory(struct OBJGPU *pGpu) {
+    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
+}
+#else //__nvoc_gpu_h_disabled
+#define gpuDestroyRusdMemory(pGpu) gpuDestroyRusdMemory_IMPL(pGpu)
+#endif //__nvoc_gpu_h_disabled
+
+NV_STATUS gpuEnableAccounting_IMPL(struct OBJGPU *arg0);
+
+#ifdef __nvoc_gpu_h_disabled
+static inline NV_STATUS gpuEnableAccounting(struct OBJGPU *arg0) {
+    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
+    return NV_ERR_NOT_SUPPORTED;
+}
+#else //__nvoc_gpu_h_disabled
+#define gpuEnableAccounting(arg0) gpuEnableAccounting_IMPL(arg0)
+#endif //__nvoc_gpu_h_disabled
+
+NV_STATUS gpuDisableAccounting_IMPL(struct OBJGPU *arg0, NvBool bForce);
+
+#ifdef __nvoc_gpu_h_disabled
+static inline NV_STATUS gpuDisableAccounting(struct OBJGPU *arg0, NvBool bForce) {
+    NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
+    return NV_ERR_NOT_SUPPORTED;
+}
+#else //__nvoc_gpu_h_disabled
+#define gpuDisableAccounting(arg0, bForce) gpuDisableAccounting_IMPL(arg0, bForce)
+#endif //__nvoc_gpu_h_disabled
+
 #undef PRIVATE_FIELD


--- a/src/nvidia/generated/g_hal_stubs.h
+++ b/src/nvidia/generated/g_hal_stubs.h
@ -2159,6 +2159,16 @@ NV_STATUS rpcMapMemoryDma_STUB(
    return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
 }

+// RPC:hal:CTRL_SET_VGPU_FB_USAGE  -  TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
+NV_STATUS rpcCtrlSetVgpuFbUsage_STUB(
+    POBJGPU       pGpu,
+    POBJRPC       pRpc,
+    void         *pArg3
+)
+{
+    return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
+}
+
 // RPC:hal:UNMAP_MEMORY_DMA  -  TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
 NV_STATUS rpcUnmapMemoryDma_STUB(
    POBJGPU       pGpu,
--- a/src/nvidia/generated/g_kern_bus_nvoc.c
+++ b/src/nvidia/generated/g_kern_bus_nvoc.c
@ -1303,17 +1303,6 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
        pThis->__kbusGetEccCounts__ = &kbusGetEccCounts_4a4dee;
    }

-    // Hal function -- kbusClearEccCounts
-    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
-    {
-        pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_GH100;
-    }
-    // default
-    else
-    {
-        pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_b3696a;
-    }
-
    pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBus_engstateConstructEngine;

    pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelBus_engstateStatePreInitLocked;
--- a/src/nvidia/generated/g_kern_bus_nvoc.h
+++ b/src/nvidia/generated/g_kern_bus_nvoc.h
@ -428,7 +428,6 @@ struct KernelBus {
    void (*__kbusTeardownCoherentCpuMapping__)(struct OBJGPU *, struct KernelBus *, NvBool);
    NV_STATUS (*__kbusBar1InstBlkBind__)(struct OBJGPU *, struct KernelBus *);
    NvU32 (*__kbusGetEccCounts__)(struct OBJGPU *, struct KernelBus *);
-    void (*__kbusClearEccCounts__)(struct OBJGPU *, struct KernelBus *);
    NV_STATUS (*__kbusStateInitUnlocked__)(POBJGPU, struct KernelBus *);
    void (*__kbusInitMissing__)(POBJGPU, struct KernelBus *);
    NV_STATUS (*__kbusStatePreInitUnlocked__)(POBJGPU, struct KernelBus *);
@ -730,8 +729,6 @@ NV_STATUS __nvoc_objCreate_KernelBus(KernelBus**, Dynamic*, NvU32);
 #define kbusBar1InstBlkBind_HAL(pGpu, pKernelBus) kbusBar1InstBlkBind_DISPATCH(pGpu, pKernelBus)
 #define kbusGetEccCounts(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
 #define kbusGetEccCounts_HAL(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
-#define kbusClearEccCounts(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
-#define kbusClearEccCounts_HAL(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
 #define kbusStateInitUnlocked(pGpu, pEngstate) kbusStateInitUnlocked_DISPATCH(pGpu, pEngstate)
 #define kbusInitMissing(pGpu, pEngstate) kbusInitMissing_DISPATCH(pGpu, pEngstate)
 #define kbusStatePreInitUnlocked(pGpu, pEngstate) kbusStatePreInitUnlocked_DISPATCH(pGpu, pEngstate)
@ -2531,16 +2528,6 @@ static inline NvU32 kbusGetEccCounts_DISPATCH(struct OBJGPU *pGpu, struct Kernel
    return pKernelBus->__kbusGetEccCounts__(pGpu, pKernelBus);
 }

-void kbusClearEccCounts_GH100(struct OBJGPU *pGpu, struct KernelBus *pKernelBus);
-
-static inline void kbusClearEccCounts_b3696a(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
-    return;
-}
-
-static inline void kbusClearEccCounts_DISPATCH(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
-    pKernelBus->__kbusClearEccCounts__(pGpu, pKernelBus);
-}
-
 static inline NV_STATUS kbusStateInitUnlocked_DISPATCH(POBJGPU pGpu, struct KernelBus *pEngstate) {
    return pEngstate->__kbusStateInitUnlocked__(pGpu, pEngstate);
 }
@ -2625,6 +2612,10 @@ static inline NvBool kbusIsBarAccessBlocked(struct KernelBus *pKernelBus) {
    return pKernelBus->bBarAccessBlocked;
 }

+static inline void kbusSetFlaSupported(struct KernelBus *pKernelBus, NvBool bSupported) {
+    pKernelBus->bFlaSupported = bSupported;
+}
+
 void kbusDestruct_IMPL(struct KernelBus *pKernelBus);

 #define __nvoc_kbusDestruct(pKernelBus) kbusDestruct_IMPL(pKernelBus)
@ -2719,6 +2710,9 @@ static inline NV_STATUS kbusIsGpuP2pAlive(struct OBJGPU *pGpu, struct KernelBus
 #define kbusIsGpuP2pAlive(pGpu, pKernelBus) kbusIsGpuP2pAlive_IMPL(pGpu, pKernelBus)
 #endif //__nvoc_kern_bus_h_disabled

+NV_STATUS kbusUpdateRusdStatistics_IMPL(struct OBJGPU *pGpu);
+
+#define kbusUpdateRusdStatistics(pGpu) kbusUpdateRusdStatistics_IMPL(pGpu)
 void kbusDetermineBar1Force64KBMapping_IMPL(struct KernelBus *pKernelBus);

 #ifdef __nvoc_kern_bus_h_disabled
--- a/Show More
+++ b/Show More