535.54.03

2023-06-14 12:37:59 -07:00 · 2023-06-14 12:37:59 -07:00 · 26458140be
parent eb5c7665a1
commit 26458140be
120 changed files with 83370 additions and 81507 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,8 @@

 ## Release 535 Entries

+### [535.54.03] 2023-06-14
+
 ### [535.43.02] 2023-05-30

 #### Fixed
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 535.43.02.
+version 535.54.03.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-535.43.02 driver release.  This can be achieved by installing
+535.54.03 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -180,7 +180,7 @@ software applications.
 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 535.43.02 release,
+(see the table below). However, in the 535.54.03 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.02/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/535.54.03/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -664,6 +664,7 @@ Subsystem Device ID.
 | NVIDIA A100 80GB PCIe                           | 20B5 10DE 1642 |
 | NVIDIA PG506-232                                | 20B6 10DE 1492 |
 | NVIDIA A30                                      | 20B7 10DE 1532 |
+| NVIDIA A30                                      | 20B7 10DE 1804 |
 | NVIDIA A100-PCIE-40GB                           | 20F1 10DE 145F |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179B |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179C |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.02\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.54.03\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@ -510,6 +510,12 @@ struct nv_file_private_t
    nv_file_private_t *ctl_nvfp;
    void *ctl_nvfp_priv;
    NvU32 register_or_refcount;
+
+    //
+    // True if a client or an event was ever allocated on this fd.
+    // If false, RMAPI cleanup is skipped.
+    //
+    NvBool bCleanupRmapi;
 };

 // Forward define the gpu ops structures
@ -959,6 +965,8 @@ NV_STATUS  NV_API_CALL  rm_perform_version_check  (nvidia_stack_t *, void *, NvU

 void       NV_API_CALL  rm_power_source_change_event        (nvidia_stack_t *, NvU32);

+void       NV_API_CALL  rm_request_dnotifier_state          (nvidia_stack_t *, nv_state_t *);
+
 void       NV_API_CALL  rm_disable_gpu_state_persistence    (nvidia_stack_t *sp, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_p2p_init_mapping       (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_destroy_mapping    (nvidia_stack_t *, NvU64);
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@ -1455,12 +1455,12 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
    concurrently with the same UvmCslContext parameter in different threads. The caller must
    guarantee this exclusion.

-    * nvUvmInterfaceCslLogDeviceEncryption
    * nvUvmInterfaceCslRotateIv
    * nvUvmInterfaceCslEncrypt
    * nvUvmInterfaceCslDecrypt
    * nvUvmInterfaceCslSign
    * nvUvmInterfaceCslQueryMessagePool
+    * nvUvmInterfaceCslIncrementIv
 */

 /*******************************************************************************
@ -1495,62 +1495,17 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
 */
 void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);

-
-/*******************************************************************************
-    nvUvmInterfaceCslLogDeviceEncryption
-
-    Returns an IV that can be later used in the nvUvmInterfaceCslEncrypt
-    method. The IV contains a "freshness bit" which value is set by this method
-    and subsequently dirtied by nvUvmInterfaceCslEncrypt to prevent
-    non-malicious reuse of the IV.
-
-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
-
-    Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
-        encryptIv[OUT]        - Parameter that is stored before a successful
-                                device encryption. It is used as an input to
-                                nvUvmInterfaceCslEncrypt.
-
-    Error codes:
-      NV_ERR_INSUFFICIENT_RESOURCES - New IV would cause a counter to overflow.
-*/
-NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
-                                               UvmCslIv *encryptIv);
-
-/*******************************************************************************
-    nvUvmInterfaceCslLogDeviceEncryption
-
-    Logs and checks information about device encryption.
-
-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
-
-    Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
-        decryptIv[OUT]        - Parameter that is stored before a successful
-                                device encryption. It is used as an input to
-                                nvUvmInterfaceCslDecrypt.
-
-    Error codes:
-      NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
-                                      to overflow.
-*/
-NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
-                                               UvmCslIv *decryptIv);
-
 /*******************************************************************************
    nvUvmInterfaceCslRotateIv

-    Rotates the IV for a given channel and direction.
+    Rotates the IV for a given channel and operation.

    This function will rotate the IV on both the CPU and the GPU.
    Outstanding messages that have been encrypted by the GPU should first be
-    decrypted before calling this function with direction equal to
-    UVM_CSL_DIR_GPU_TO_CPU. Similiarly, outstanding messages that have been
+    decrypted before calling this function with operation equal to
+    UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
    encrypted by the CPU should first be decrypted before calling this function
-    with direction equal to UVM_CSL_DIR_CPU_TO_GPU. For a given direction
+    with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
    the channel must be idle before calling this function. This function can be
    called regardless of the value of the IV's message counter.

@ -1559,17 +1514,17 @@ NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,

 Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
-        direction[IN]         - Either
-                                - UVM_CSL_DIR_CPU_TO_GPU
-                                - UVM_CSL_DIR_GPU_TO_CPU
+        operation[IN]         - Either
+                                - UVM_CSL_OPERATION_ENCRYPT
+                                - UVM_CSL_OPERATION_DECRYPT

    Error codes:
      NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
                                      to overflow.
-      NV_ERR_INVALID_ARGUMENT       - Invalid value for direction.
+      NV_ERR_INVALID_ARGUMENT       - Invalid value for operation.
 */
 NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
-                                    UvmCslDirection direction);
+                                    UvmCslOperation operation);

 /*******************************************************************************
    nvUvmInterfaceCslEncrypt
@ -1580,7 +1535,7 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
    this function produces undefined behavior. Performance is typically
    maximized when the input and output buffers are 16-byte aligned. This is
    natural alignment for AES block.
-    The encryptIV can be obtained from nvUvmInterfaceCslAcquireEncryptionIv.
+    The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
    However, it is optional. If it is NULL, the next IV in line will be used.

    See "CSL Interface and Locking" for locking requirements.
@ -1623,12 +1578,18 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,

    Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
-        bufferSize[IN]        - Size of the input and output buffers in
-                                units of bytes. Value can range from 1 byte
-                                to (2^32) - 1 bytes.
-        decryptIv[IN]         - Parameter given by nvUvmInterfaceCslLogDeviceEncryption.
+        bufferSize[IN]        - Size of the input and output buffers in units of bytes.
+                                Value can range from 1 byte to (2^32) - 1 bytes.
+        decryptIv[IN]         - IV used to decrypt the ciphertext. Its value can either be given by
+                                nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
+                                internal counter is used.
        inputBuffer[IN]       - Address of ciphertext input buffer.
        outputBuffer[OUT]     - Address of plaintext output buffer.
+        addAuthData[IN]       - Address of the plaintext additional authenticated data used to
+                                calculate the authentication tag. Can be NULL.
+        addAuthDataSize[IN]   - Size of the additional authenticated data in units of bytes.
+                                Value can range from 1 byte to (2^32) - 1 bytes.
+                                This parameter is ignored if addAuthData is NULL.
        authTagBuffer[IN]     - Address of authentication tag buffer.
                                Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.

@ -1643,6 +1604,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
                                   NvU8 const *inputBuffer,
                                   UvmCslIv const *decryptIv,
                                   NvU8 *outputBuffer,
+                                   NvU8 const *addAuthData,
+                                   NvU32 addAuthDataSize,
                                   NvU8 const *authTagBuffer);

 /*******************************************************************************
@ -1673,7 +1636,6 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
                                NvU8 const *inputBuffer,
                                NvU8 *authTagBuffer);

-
 /*******************************************************************************
    nvUvmInterfaceCslQueryMessagePool

@ -1684,14 +1646,45 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,

    Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
-        direction[IN]         - Either UVM_CSL_DIR_CPU_TO_GPU or UVM_CSL_DIR_GPU_TO_CPU.
+        operation[IN]         - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
        messageNum[OUT]       - Number of messages left before overflow.

    Error codes:
-      NV_ERR_INVALID_ARGUMENT - The value of the direction parameter is illegal.
+      NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
 */
 NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
-                                            UvmCslDirection direction,
+                                            UvmCslOperation operation,
                                            NvU64 *messageNum);

+/*******************************************************************************
+    nvUvmInterfaceCslIncrementIv
+
+    Increments the message counter by the specified amount.
+
+    If iv is non-NULL then the incremented value is returned.
+    If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
+    can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
+    the returned IV can be used in nvUvmInterfaceCslDecrypt.
+
+    See "CSL Interface and Locking" for locking requirements.
+    This function does not perform dynamic memory allocation.
+
+Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        operation[IN]         - Either
+                                - UVM_CSL_OPERATION_ENCRYPT
+                                - UVM_CSL_OPERATION_DECRYPT
+        increment[IN]         - The amount by which the IV is incremented. Can be 0.
+        iv[out]               - If non-NULL, a buffer to store the incremented IV.
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT       - The value of the operation parameter is illegal.
+      NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
+                                      in an overflow.
+*/
+NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
+                                       UvmCslOperation operation,
+                                       NvU64 increment,
+                                       UvmCslIv *iv);
+
 #endif // _NV_UVM_INTERFACE_H_
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@ -286,6 +286,7 @@ typedef struct UvmGpuChannelInfo_tag
    // so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
    NvU64             gpFifoGpuVa;
    NvU64             gpPutGpuVa;
+    NvU64             gpGetGpuVa;
    // GPU VA of work submission offset is needed in Confidential Computing
    // so CE channels can ring doorbell of other channels as required for
    // WLC/LCIC work submission
@ -1060,10 +1061,10 @@ typedef struct UvmCslIv
    NvU8 fresh;
 } UvmCslIv;

-typedef enum UvmCslDirection
+typedef enum UvmCslOperation
 {
-    UVM_CSL_DIR_CPU_TO_GPU,
-    UVM_CSL_DIR_GPU_TO_CPU
-} UvmCslDirection;
+    UVM_CSL_OPERATION_ENCRYPT,
+    UVM_CSL_OPERATION_DECRYPT
+} UvmCslOperation;

 #endif // _NV_UVM_TYPES_H_
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@ -103,13 +103,12 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n

 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
-NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
-NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_acquire_encryption_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
-NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);

 #endif
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@ -919,6 +919,21 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_VFIO_MIGRATION_OPS_PRESENT" "" "types"
        ;;

+        vfio_precopy_info)
+            #
+            # Determine if vfio_precopy_info struct is present or not
+            #
+            # Added by commit 4db52602a6074 ("vfio: Extend the device migration
+            # protocol with PRE_COPY" in v6.2
+            #
+            CODE="
+            #include <linux/vfio.h>
+            struct vfio_precopy_info precopy_info;
+            "
+
+            compile_check_conftest "$CODE" "NV_VFIO_PRECOPY_INFO_PRESENT" "" "types"
+        ;;
+
        vfio_log_ops)
            #
            # Determine if vfio_log_ops struct is present or not
--- a/kernel-open/nvidia-drm/nvidia-drm-gem.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem.h
@ -179,6 +179,7 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
    return drm_gem_handle_create(filp, &nv_gem->base, handle);
 }

+#if defined(NV_DRM_FENCE_AVAILABLE)
 static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
 {
 #if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
@ -187,6 +188,7 @@ static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem
    return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
 #endif
 }
+#endif

 void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
                            struct nv_drm_gem_object *nv_gem,
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@ -338,11 +338,6 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
        return NV_OK;
    }

-    if (!gpu->parent->ce_hal->memcopy_is_valid(&push, dst, src)) {
-        TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
-        return NV_OK;
-    }
-
    // The input virtual addresses exist in UVM's internal address space, not
    // the proxy address space
    if (uvm_channel_is_proxy(push.channel)) {
@ -401,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
 static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
 {
    NV_STATUS status = NV_OK;
-    bool is_proxy_va_space;
+    bool is_proxy_va_space = false;
    uvm_gpu_address_t gpu_verif_addr;
    void *cpu_verif_addr;
    uvm_mem_t *verif_mem = NULL;
@ -437,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
        }
    }

+    // Virtual address (in UVM's internal address space) backed by sysmem
+    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
+    gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
+
+    if (uvm_conf_computing_mode_enabled(gpu)) {
+        for (i = 0; i < iterations; ++i) {
+            for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
+                TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
+                                                                gpu_addresses[0],
+                                                                gpu_addresses[0],
+                                                                size,
+                                                                element_sizes[s],
+                                                                gpu_verif_addr,
+                                                                cpu_verif_addr,
+                                                                i),
+                                    done);
+
+            }
+        }
+
+        // Because gpu_verif_addr is in sysmem, when the Confidential
+        // Computing feature is enabled, only the previous cases are valid.
+        // TODO: Bug 3839176: the test partially waived on Confidential
+        // Computing because it assumes that GPU can access system memory
+        // without using encryption.
+        goto done;
+    }
+
    // Using a page size equal to the allocation size ensures that the UVM
    // memories about to be allocated are physically contiguous. And since the
    // size is a valid GPU page size, the memories can be virtually mapped on
@ -448,37 +471,22 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    // Physical address in sysmem
    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
-    gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
+    gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);

    // Physical address in vidmem
    mem_params.backing_gpu = gpu;
    TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
-    gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
+    gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);

    // Virtual address (in UVM's internal address space) backed by vidmem
    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
-    is_proxy_va_space = false;
-    gpu_addresses[2] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
+    gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);

-    // Virtual address (in UVM's internal address space) backed by sysmem
-    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
-    gpu_addresses[3] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);

    for (i = 0; i < iterations; ++i) {
        for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
            for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
                for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
-                  // Because gpu_verif_addr is in sysmem, when the Confidential
-                  // Computing feature is enabled, only the following cases are
-                  // valid.
-                  //
-                  // TODO: Bug 3839176: the test partially waived on
-                  // Confidential Computing because it assumes that GPU can
-                  // access system memory without using encryption.
-                  if (uvm_conf_computing_mode_enabled(gpu) &&
-                      !(gpu_addresses[k].is_unprotected && gpu_addresses[j].is_unprotected)) {
-                        continue;
-                  }
                    TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
                                                                    gpu_addresses[k],
                                                                    gpu_addresses[j],
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@ -750,9 +750,9 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
                       wlc_channel->channel_info.workSubmissionToken);
 }

-static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
-                                                       NvU32 old_cpu_put,
-                                                       NvU32 new_gpu_put)
+static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
+                                                      NvU32 old_cpu_put,
+                                                      NvU32 new_gpu_put)
 {
    uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
@ -765,10 +765,211 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
    NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
    void *push_enc_auth_tag;
    uvm_gpu_address_t push_enc_auth_tag_gpu;
-    NvU64 gpfifo_gpu = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
+    NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
+
+    UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
+    UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
+
+    // WLC submissions are done under channel lock, so there should be no
+    // contention to get the right submission order.
+    UVM_ASSERT(push->channel->conf_computing.gpu_put == old_cpu_put);
+
+    // This can never stall or return error. WLC launch after WLC channels are
+    // initialized uses private static pb space and it neither needs the general
+    // PB space, nor it counts towards max concurrent pushes.
+    status = uvm_push_begin_on_reserved_channel(push->launch_channel,
+                                                &indirect_push,
+                                                "Worklaunch to '%s' via '%s'",
+                                                push->channel->name,
+                                                push->launch_channel->name);
+    UVM_ASSERT(status == NV_OK);
+
+
+    // Move over the pushbuffer data
+    // WLC channels use a static preallocated space for launch auth tags
+    push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
+    push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
+
+    uvm_conf_computing_cpu_encrypt(indirect_push.channel,
+                                   push_enc_cpu,
+                                   push->begin,
+                                   NULL,
+                                   uvm_push_get_size(push),
+                                   push_enc_auth_tag);
+
+    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
+
+    gpu->parent->ce_hal->decrypt(&indirect_push,
+                                 uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
+                                 uvm_gpu_address_virtual(push_enc_gpu),
+                                 uvm_push_get_size(push),
+                                 push_enc_auth_tag_gpu);
+
+    gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
+                                            uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
+                                            uvm_push_get_size(push),
+                                            UVM_GPFIFO_SYNC_PROCEED);
+
+    gpu->parent->ce_hal->memset_8(&indirect_push,
+                                  uvm_gpu_address_virtual(gpfifo_gpu_va),
+                                  gpfifo_entry,
+                                  sizeof(gpfifo_entry));
+
+    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
+    do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
+
+    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
+    do_semaphore_release(&indirect_push,
+                         push->channel->channel_info.workSubmissionOffsetGpuVa,
+                         push->channel->channel_info.workSubmissionToken);
+
+    // Ignore return value of push_wait. It can only fail with channel error
+    // which will be detected when waiting for the primary push.
+    (void)uvm_push_end_and_wait(&indirect_push);
+
+    push->channel->conf_computing.gpu_put = new_gpu_put;
+}
+
+static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 new_gpu_put)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
+    void *gpput_auth_tag_cpu, *gpput_enc_cpu;
+    uvm_gpu_address_t gpput_auth_tag_gpu, gpput_enc_gpu;
+    NvU32 gpput_scratchpad[UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT/sizeof(new_gpu_put)];
+
+    UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
+
+    gpput_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
+                                                      UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
+                                                      UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
+                                                      &gpput_enc_gpu);
+    gpput_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
+                                                           UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                                           UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
+                                                           &gpput_auth_tag_gpu);
+
+
+
+    // Update GPPUT. The update needs 4B write to specific offset,
+    // however we can only do 16B aligned decrypt writes.
+    // A poison value is written to all other locations, this is ignored in
+    // most locations and overwritten by HW for GPGET location
+    memset(gpput_scratchpad, 0, sizeof(gpput_scratchpad));
+    UVM_ASSERT(sizeof(*gpput_scratchpad) == sizeof(new_gpu_put));
+    gpput_scratchpad[(channel->channel_info.gpPutGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
+                     sizeof(*gpput_scratchpad)] = new_gpu_put;
+
+    // Set value of GPGET to be the same as GPPUT. It will be overwritten by
+    // HW next time GET value changes. UVM never reads GPGET.
+    // However, RM does read it when freeing a channel. When this function
+    // is called from 'channel_manager_stop_wlc' we set the value of GPGET
+    // to the same value as GPPUT. Mismatch between these two values makes
+    // RM wait for any "pending" tasks, leading to significant delays in the
+    // channel teardown sequence.
+    UVM_ASSERT(channel->channel_info.gpPutGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT ==
+               channel->channel_info.gpGetGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
+    gpput_scratchpad[(channel->channel_info.gpGetGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
+                     sizeof(*gpput_scratchpad)] = new_gpu_put;
+
+    uvm_conf_computing_cpu_encrypt(sec2_push->channel,
+                                   gpput_enc_cpu,
+                                   gpput_scratchpad,
+                                   NULL,
+                                   sizeof(gpput_scratchpad),
+                                   gpput_auth_tag_cpu);
+    gpu->parent->sec2_hal->decrypt(sec2_push,
+                                   UVM_ALIGN_DOWN(channel->channel_info.gpPutGpuVa,
+                                                  UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
+                                   gpput_enc_gpu.address,
+                                   sizeof(gpput_scratchpad),
+                                   gpput_auth_tag_gpu.address);
+}
+
+static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 put, NvU64 value)
+{
+    uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
+    void *gpfifo_auth_tag_cpu, *gpfifo_enc_cpu;
+    uvm_gpu_address_t gpfifo_auth_tag_gpu, gpfifo_enc_gpu;
+    NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + put * sizeof(value);
+    NvU64 gpfifo_scratchpad[2];
+
+    UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
+
+    gpfifo_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
+                                                       sizeof(gpfifo_scratchpad),
+                                                       UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
+                                                       &gpfifo_enc_gpu);
+    gpfifo_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
+                                                            UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                                            UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
+                                                            &gpfifo_auth_tag_gpu);
+
+    if (IS_ALIGNED(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT)) {
+        gpfifo_scratchpad[0] = value;
+
+        // Set the value of the odd entry to noop.
+        // It will be overwritten when the next entry is submitted.
+        gpu->parent->host_hal->set_gpfifo_noop(&gpfifo_scratchpad[1]);
+    }
+    else {
+        uvm_gpfifo_entry_t *previous_gpfifo;
+
+        UVM_ASSERT(put > 0);
+
+        previous_gpfifo = &channel->gpfifo_entries[put - 1];
+
+        if (previous_gpfifo->type ==  UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
+            gpfifo_scratchpad[0] = previous_gpfifo->control_value;
+        }
+        else {
+            uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
+            NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
+
+            // Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
+            // used only in static WLC schedule.
+            // Overwriting the previous entry with the same value doesn't hurt,
+            // whether the previous entry has been processed or not
+            gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_scratchpad[0],
+                                                    prev_pb_va,
+                                                    previous_gpfifo->pushbuffer_size,
+                                                    UVM_GPFIFO_SYNC_PROCEED);
+        }
+
+        gpfifo_scratchpad[1] = value;
+    }
+
+    uvm_conf_computing_cpu_encrypt(sec2_push->channel,
+                                   gpfifo_enc_cpu,
+                                   gpfifo_scratchpad,
+                                   NULL,
+                                   sizeof(gpfifo_scratchpad),
+                                   gpfifo_auth_tag_cpu);
+    gpu->parent->sec2_hal->decrypt(sec2_push,
+                                   UVM_ALIGN_DOWN(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
+                                   gpfifo_enc_gpu.address,
+                                   sizeof(gpfifo_scratchpad),
+                                   gpfifo_auth_tag_gpu.address);
+}
+
+static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
+                                                            NvU32 old_cpu_put,
+                                                            NvU32 new_gpu_put)
+{
+    uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
+    uvm_gpu_t *gpu = uvm_push_get_gpu(push);
+
+    uvm_push_t indirect_push;
+    NV_STATUS status;
+    NvU64 gpfifo_entry;
+
+    void *push_enc_cpu = uvm_pushbuffer_get_unprotected_cpu_va_for_push(pushbuffer, push);
+    NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
+    void *push_auth_tag_cpu;
+    uvm_gpu_address_t push_auth_tag_gpu;
    uvm_spin_loop_t spin;

    UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
+    UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));

    // If the old_cpu_put is not equal to the last gpu put, other pushes are
    // pending that need to be submitted. That push/es' submission will update
@ -790,60 +991,36 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,


    // Move over the pushbuffer data
-    if (uvm_channel_is_sec2(indirect_push.channel)) {
-        push_enc_auth_tag = uvm_push_get_single_inline_buffer(&indirect_push,
-                                                              UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
-                                                              UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
-                                                              &push_enc_auth_tag_gpu);
-    }
-    else {
-        // Auth tags cannot be in protected vidmem.
-        // WLC channels use a static preallocated space for launch auth tags
-        push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
-        push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
-    }
+    push_auth_tag_cpu = uvm_push_get_single_inline_buffer(&indirect_push,
+                                                          UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
+                                                          UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
+                                                          &push_auth_tag_gpu);

    uvm_conf_computing_cpu_encrypt(indirect_push.channel,
                                   push_enc_cpu,
                                   push->begin,
                                   NULL,
                                   uvm_push_get_size(push),
-                                   push_enc_auth_tag);
+                                   push_auth_tag_cpu);

    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);

-    if (uvm_channel_is_sec2(indirect_push.channel)) {
-        gpu->parent->sec2_hal->decrypt(&indirect_push,
-                                       uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
-                                       push_enc_gpu,
-                                       uvm_push_get_size(push),
-                                       push_enc_auth_tag_gpu.address);
-    }
-    else {
-        gpu->parent->ce_hal->decrypt(&indirect_push,
-                                     uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
-                                     uvm_gpu_address_virtual(push_enc_gpu),
-                                     uvm_push_get_size(push),
-                                     push_enc_auth_tag_gpu);
-    }
+    gpu->parent->sec2_hal->decrypt(&indirect_push,
+                                   uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
+                                   push_enc_gpu,
+                                   uvm_push_get_size(push),
+                                   push_auth_tag_gpu.address);

    gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
                                            uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
                                            uvm_push_get_size(push),
                                            UVM_GPFIFO_SYNC_PROCEED);

-    // TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
-    // SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
-    // Use 2x semaphore release to set the values directly.
-    // We could use a single 64 bit release if it were available
-    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-    do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(gpfifo_entry));
-    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-    do_semaphore_release(&indirect_push, gpfifo_gpu + 4, NvU64_HI32(gpfifo_entry));

-    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
-    do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
+    set_gpfifo_via_sec2(&indirect_push, push->channel, old_cpu_put, gpfifo_entry);
+    update_gpput_via_sec2(&indirect_push, push->channel, new_gpu_put);

+    // Ring the doorbell
    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
    do_semaphore_release(&indirect_push,
                         push->channel->channel_info.workSubmissionOffsetGpuVa,
@ -930,11 +1107,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    }
    else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
        if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
-            NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
-
-            // This codepath should only be used during initialization and thus
-            // NEVER return an error.
-            UVM_ASSERT(status == NV_OK);
+            internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
        }
        else {
            // submitting via SEC2 starts a push, postpone until this push is ended
@ -963,7 +1136,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    wmb();

    if (needs_sec2_work_submit) {
-        NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
+        NV_STATUS status = internal_channel_submit_work_indirect_sec2(push, cpu_put, new_cpu_put);

        // This codepath should only be used during initialization and thus
        // NEVER return an error.
@ -1007,7 +1180,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
    uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(channel->pool->manager) ?
                                               UVM_CHANNEL_TYPE_WLC :
                                               UVM_CHANNEL_TYPE_SEC2;
-    NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));

    UVM_ASSERT(!uvm_channel_is_sec2(channel));

@ -1026,17 +1198,26 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
    if (status != NV_OK)
        return status;

-    // TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
-    // SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
-    // Use 2x semaphore release to set the values directly.
-    // One 64bit semahore release can be used instead once implemented.
-    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-    do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(entry->control_value));
-    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
-    do_semaphore_release(&indirect_push, gpfifo_gpu + 4,  NvU64_HI32(entry->control_value));
+    if (uvm_channel_is_sec2(indirect_push.channel)) {
+        set_gpfifo_via_sec2(&indirect_push, channel, old_cpu_put, entry->control_value);
+        update_gpput_via_sec2(&indirect_push, channel, new_gpu_put);
+    } else {
+        uvm_gpu_t *gpu = uvm_push_get_gpu(&indirect_push);
+        NvU64 gpfifo_gpu_va = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));
+
+        gpu->parent->ce_hal->memset_8(&indirect_push,
+                                      uvm_gpu_address_virtual(gpfifo_gpu_va),
+                                      entry->control_value,
+                                      sizeof(entry->control_value));
+
+        uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
+        do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
+    }

    uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
-    do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
+    do_semaphore_release(&indirect_push,
+                         channel->channel_info.workSubmissionOffsetGpuVa,
+                         channel->channel_info.workSubmissionToken);

    status = uvm_push_end_and_wait(&indirect_push);
    if (status != NV_OK)
@ -1044,9 +1225,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,

    channel->conf_computing.gpu_put = new_gpu_put;

-    // The above SEC2 work transferred everything
-    // Ring the doorbell
-    UVM_GPU_WRITE_ONCE(*channel->channel_info.workSubmissionOffset, channel->channel_info.workSubmissionToken);
    return NV_OK;
 }

@ -1445,17 +1623,21 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
 static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
 {
    uvm_gpu_t *gpu = channel->pool->manager->gpu;
+    size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
    NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
                                                    UVM_RM_MEM_TYPE_SYS,
-                                                    UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
+                                                    aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
                                                    PAGE_SIZE,
                                                    &channel->conf_computing.static_pb_unprotected_sysmem);
    if (status != NV_OK)
        return status;

+    // Both pushes will be targets for SEC2 decrypt operations and have to
+    // be aligned for SEC2. The first push location will also be a target
+    // for CE decrypt operation and has to be aligned for CE decrypt.
    status = uvm_rm_mem_alloc(gpu,
                              UVM_RM_MEM_TYPE_GPU,
-                              UVM_MAX_WLC_PUSH_SIZE * 2,
+                              UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT) * 2,
                              UVM_CONF_COMPUTING_BUF_ALIGNMENT,
                              &channel->conf_computing.static_pb_protected_vidmem);
    if (status != NV_OK)
@ -1464,16 +1646,16 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
    channel->conf_computing.static_pb_unprotected_sysmem_cpu =
        uvm_rm_mem_get_cpu_va(channel->conf_computing.static_pb_unprotected_sysmem);
    channel->conf_computing.static_pb_unprotected_sysmem_auth_tag_cpu =
-        (char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + UVM_MAX_WLC_PUSH_SIZE;
+        (char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + aligned_wlc_push_size;

    // The location below is only used for launch pushes but reuses
    // the same sysmem allocation
    channel->conf_computing.launch_auth_tag_cpu =
        (char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu +
-        UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+        aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
    channel->conf_computing.launch_auth_tag_gpu_va =
        uvm_rm_mem_get_gpu_uvm_va(channel->conf_computing.static_pb_unprotected_sysmem, gpu) +
-        UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
+        aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;

    channel->conf_computing.static_pb_protected_sysmem = uvm_kvmalloc(UVM_MAX_WLC_PUSH_SIZE + UVM_PAGE_SIZE_4K);
    if (!channel->conf_computing.static_pb_protected_sysmem)
@ -2576,7 +2758,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
    // "decrypt_push" represents WLC decrypt push, constructed using fake_push.
    // Copied to wlc_pb_base + UVM_MAX_WLC_PUSH_SIZE, as the second of the two
    // pushes that make the WLC fixed schedule.
-    NvU64 decrypt_push_protected_gpu = protected_vidmem + UVM_MAX_WLC_PUSH_SIZE;
+    NvU64 decrypt_push_protected_gpu = UVM_ALIGN_UP(protected_vidmem + UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT);
    NvU64 decrypt_push_unprotected_gpu = unprotected_sysmem_gpu + gpfifo_size;
    void *decrypt_push_unprotected_cpu = (char*)gpfifo_unprotected_cpu + gpfifo_size;

@ -2587,7 +2769,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
    BUILD_BUG_ON(sizeof(*wlc_gpfifo_entries) != sizeof(*wlc->channel_info.gpFifoEntries));

    UVM_ASSERT(uvm_channel_is_wlc(wlc));
-    UVM_ASSERT(tag_offset == UVM_MAX_WLC_PUSH_SIZE);
+    UVM_ASSERT(tag_offset == UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));

    // WLC schedule consists of two parts, the number of entries needs to be even.
    // This also guarantees that the size is 16B aligned
@ -2692,11 +2874,9 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)

    // Prime the WLC by setting "PUT" two steps ahead. Reuse the current
    // cpu_put value that was used during channel initialization.
-    // Don't update wlc->cpu_put, it will be used to track
-    // submitted pushes as any other channel.
-    do_semaphore_release(&sec2_push,
-                         wlc->channel_info.gpPutGpuVa,
-                         (wlc->cpu_put + 2) % wlc->num_gpfifo_entries);
+    // Don't update wlc->cpu_put, it will be used to track submitted pushes
+    // as any other channel.
+    update_gpput_via_sec2(&sec2_push, wlc, (wlc->cpu_put + 2) % wlc->num_gpfifo_entries);

    status = uvm_push_end_and_wait(&sec2_push);

@ -3048,9 +3228,7 @@ static void channel_manager_stop_wlc(uvm_channel_manager_t *manager)
        // Every gpfifo entry advances the gpu put of WLC by two so the current
        // value is: (cpu_put * 2) % num_gpfifo_entries and it's ahead of the
        // get pointer by 2.
-        do_semaphore_release(&push,
-                             channel->channel_info.gpPutGpuVa,
-                             (channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
+        update_gpput_via_sec2(&push, channel, (channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
    }

    status = uvm_push_end_and_wait(&push);
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@ -378,11 +378,12 @@ void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
    NV_STATUS status;

    uvm_mutex_lock(&channel->csl.ctx_lock);
-    status = nvUvmInterfaceCslLogDeviceEncryption(&channel->csl.ctx, iv);
+    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

-    // nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
-    // overflows. This is not supposed to happen on CC.
+    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
+    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
+    // nvUvmInterfaceCslRotateIv.
    UVM_ASSERT(status == NV_OK);
 }

@ -391,11 +392,12 @@ void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *
    NV_STATUS status;

    uvm_mutex_lock(&channel->csl.ctx_lock);
-    status = nvUvmInterfaceCslAcquireEncryptionIv(&channel->csl.ctx, iv);
+    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

-    // nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
-    // overflows. This is not supposed to happen on CC.
+    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
+    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
+    // nvUvmInterfaceCslRotateIv.
    UVM_ASSERT(status == NV_OK);
 }

@ -439,6 +441,8 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
                                      (const NvU8 *) src_cipher,
                                      src_iv,
                                      (NvU8 *) dst_plain,
+                                      NULL,
+                                      0,
                                      (const NvU8 *) auth_tag_buffer);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@ -42,9 +42,11 @@
 // Use sizeof(UvmCslIv) to refer to the IV size.
 #define UVM_CONF_COMPUTING_IV_ALIGNMENT 16

-// SEC2 decrypt operation buffers are required to be 16-bytes aligned. CE
-// encrypt/decrypt can be unaligned if the buffer lies in a single 32B segment.
-// Otherwise, they need to be 32B aligned.
+// SEC2 decrypt operation buffers are required to be 16-bytes aligned.
+#define UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT 16
+
+// CE encrypt/decrypt can be unaligned if the entire buffer lies in a single
+// 32B segment. Otherwise, it needs to be 32B aligned.
 #define UVM_CONF_COMPUTING_BUF_ALIGNMENT 32

 #define UVM_CONF_COMPUTING_DMA_BUFFER_SIZE UVM_VA_BLOCK_SIZE
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@ -2575,7 +2575,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
                continue;
            }

-            if (folio_test_swapcache(page_folio(src_page))) {
+            if (PageSwapCache(src_page)) {
                // TODO: Bug 4050579: Remove this when swap cached pages can be
                // migrated.
                if (service_context) {
--- a/kernel-open/nvidia-uvm/uvm_hopper_sec2.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_sec2.c
@ -166,6 +166,7 @@ void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, N
    NvU32 *csl_sign_init = push->next;

    // Check that the provided alignment matches HW
+    BUILD_BUG_ON(UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT != (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
    BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT < (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
    BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT % (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)) != 0);

--- a/kernel-open/nvidia-uvm/uvm_pushbuffer.h
+++ b/kernel-open/nvidia-uvm/uvm_pushbuffer.h
@ -161,22 +161,22 @@
 // * WFI:                     8B
 // Total:                    64B
 //
-// Push space needed for secure work launch is 224B. The push is constructed
+// Push space needed for secure work launch is 364B. The push is constructed
 // in 'internal_channel_submit_work_indirect' and 'uvm_channel_end_push'
 // * CE decrypt (of indirect PB):                   56B
-// * 2*semaphore release (indirect GPFIFO entry): 2*24B
+// * memset_8 (indirect GPFIFO entry):              44B
 // * semaphore release (indirect GPPUT):            24B
 // * semaphore release (indirect doorbell):         24B
 // Appendix added in 'uvm_channel_end_push':
 // * semaphore release (WLC tracking):             168B
-//      * semaphore increment (memcopy):            24B
+//      * semaphore release (payload):              24B
 //      * notifier memset:                          40B
 //      * payload encryption:                       64B
 //      * notifier memset:                          40B
 // * semaphore increment (LCIC GPPUT):              24B
 // * semaphore release (LCIC doorbell):             24B
-// Total:                                          368B
-#define UVM_MAX_WLC_PUSH_SIZE (368)
+// Total:                                          364B
+#define UVM_MAX_WLC_PUSH_SIZE (364)

 // Push space needed for static LCIC schedule, as initialized in
 // 'setup_lcic_schedule':
@ -184,7 +184,7 @@
 // * semaphore increment (WLC GPPUT):      24B
 // * semaphore increment (WLC GPPUT):      24B
 // * semaphore increment (LCIC tracking): 160B
-//      * semaphore increment (memcopy):   24B
+//      * semaphore increment (payload):   24B
 //      * notifier memcopy:                36B
 //      * payload encryption:              64B
 //      * notifier memcopy:                36B
--- a/kernel-open/nvidia-uvm/uvm_sec2_test.c
+++ b/kernel-open/nvidia-uvm/uvm_sec2_test.c
@ -213,6 +213,7 @@ done:
 typedef enum
 {
    MEM_ALLOC_TYPE_SYSMEM_DMA,
+    MEM_ALLOC_TYPE_SYSMEM_PROTECTED,
    MEM_ALLOC_TYPE_VIDMEM_PROTECTED
 } mem_alloc_type_t;

@ -274,7 +275,11 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
        TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
    }
    else {
-        TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
+        if (type == MEM_ALLOC_TYPE_SYSMEM_DMA)
+            TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
+        else
+            TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, NULL, mem));
+
        TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
        TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
        write_range_cpu(*mem, size, 0xdeaddead);
@ -405,48 +410,6 @@ static void gpu_decrypt(uvm_push_t *push,
    }
 }

-// This test only uses sysmem so that we can use the CPU for encryption and SEC2
-// for decryption, i.e., the test doesn't depend on any other GPU engine for
-// the encryption operation (refer to test_cpu_to_gpu_roundtrip()). This is not
-// how SEC2 is used in the driver. The intended SEC2 usage is to decrypt from
-// unprotected sysmem to protected vidmem, which is tested in
-// test_cpu_to_gpu_roundtrip().
-static NV_STATUS test_cpu_to_gpu_sysmem(uvm_gpu_t *gpu, size_t copy_size, size_t size)
-{
-    NV_STATUS status = NV_OK;
-    uvm_mem_t *src_plain = NULL;
-    uvm_mem_t *cipher = NULL;
-    uvm_mem_t *dst_plain = NULL;
-    uvm_mem_t *auth_tag_mem = NULL;
-    size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
-    uvm_push_t push;
-
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
-
-    write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
-    write_range_cpu(dst_plain, size, 0xA5A5A5A5);
-
-    TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_SEC2, &push, "enc(cpu)_dec(gpu)"), out);
-
-    cpu_encrypt(push.channel, cipher, src_plain, auth_tag_mem, size, copy_size);
-    gpu_decrypt(&push, dst_plain, cipher, auth_tag_mem, size, copy_size);
-
-    uvm_push_end_and_wait(&push);
-
-    TEST_CHECK_GOTO(mem_match(src_plain, dst_plain), out);
-
-out:
-    uvm_mem_free(auth_tag_mem);
-    uvm_mem_free(cipher);
-    uvm_mem_free(dst_plain);
-    uvm_mem_free(src_plain);
-
-    return status;
-}
-
 // This test depends on the CE for the encryption, so we assume tests from
 // uvm_ce_test.c have successfully passed.
 static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, size_t size)
@ -461,19 +424,16 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
    size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
    uvm_push_t push;
    UvmCslIv *decrypt_iv;
-    uvm_tracker_t tracker;

    decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
    if (!decrypt_iv)
        return NV_ERR_NO_MEMORY;

-    uvm_tracker_init(&tracker);
-
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
+    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_VIDMEM_PROTECTED), out);
-    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
+    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
    TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);

    write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
@ -483,14 +443,13 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
    cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
    gpu_decrypt(&push, dst_plain, src_cipher, auth_tag_mem, size, copy_size);

-    uvm_push_end(&push);
-    TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);

-    TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
-                                              UVM_CHANNEL_TYPE_GPU_TO_CPU,
-                                              &tracker,
-                                              &push,
-                                              "enc(gpu)_dec(cpu)"),
+    // Wait for SEC2 before launching the CE part.
+    // SEC2 is only allowed to release semaphores in unprotected sysmem,
+    // and CE can only acquire semaphores in protected vidmem.
+    TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
+
+    TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "enc(gpu)_dec(cpu)"),
                       out);

    gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
@ -521,8 +480,6 @@ out:

    uvm_kvfree(decrypt_iv);

-    uvm_tracker_deinit(&tracker);
-
    return status;
 }

@ -545,7 +502,6 @@ static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu)

        UVM_ASSERT(size % copy_sizes[i] == 0);

-        TEST_NV_CHECK_RET(test_cpu_to_gpu_sysmem(gpu, copy_sizes[i], size));
        TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu, copy_sizes[i], size));
    }

--- a/kernel-open/nvidia-uvm/uvm_tracker_test.c
+++ b/kernel-open/nvidia-uvm/uvm_tracker_test.c
@ -69,6 +69,14 @@ static NV_STATUS test_tracker_completion(uvm_va_space_t *va_space)
    gpu = uvm_va_space_find_first_gpu(va_space);
    TEST_CHECK_RET(gpu != NULL);

+    // TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore mechanism
+    //                     to all semaphore
+    // This test allocates semaphore in vidmem and then releases it from the CPU
+    // SEC2 channels cannot target semaphores in vidmem. Moreover, CPU cannot
+    // directly release values to vidmem for CE channels.
+    if (uvm_conf_computing_mode_enabled(gpu))
+        return NV_OK;
+
    TEST_NV_CHECK_RET(uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &sema));

    uvm_tracker_init(&tracker);
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@ -7189,6 +7189,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
 }

 static void map_get_allowed_destinations(uvm_va_block_t *block,
+                                         uvm_va_block_context_t *va_block_context,
                                         const uvm_va_policy_t *policy,
                                         uvm_processor_id_t id,
                                         uvm_processor_mask_t *allowed_mask)
@ -7200,7 +7201,10 @@ static void map_get_allowed_destinations(uvm_va_block_t *block,
        uvm_processor_mask_zero(allowed_mask);
        uvm_processor_mask_set(allowed_mask, policy->preferred_location);
    }
-    else if ((uvm_va_policy_is_read_duplicate(policy, va_space) || uvm_id_equal(policy->preferred_location, id)) &&
+    else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
+              (uvm_id_equal(policy->preferred_location, id) &&
+               !is_uvm_fault_force_sysmem_set() &&
+               !uvm_hmm_must_use_sysmem(block, va_block_context))) &&
             uvm_va_space_processor_has_memory(va_space, id)) {
        // When operating under read-duplication we should only map the local
        // processor to cause fault-and-duplicate of remote pages.
@ -7285,7 +7289,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,

    // Map per resident location so we can more easily detect physically-
    // contiguous mappings.
-    map_get_allowed_destinations(va_block, va_block_context->policy, id, &allowed_destinations);
+    map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);

    for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
        if (UVM_ID_IS_CPU(id)) {
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@ -418,15 +418,6 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
    uvm_global_processor_mask_t retained_gpus;
    LIST_HEAD(deferred_free_list);

-    // Normally we'd expect this to happen as part of uvm_mm_release()
-    // but if userspace never initialized uvm_mm_fd that won't happen.
-    // We don't have to take the va_space_mm spinlock and update state
-    // here because we know no other thread can be in or subsequently
-    // call uvm_api_mm_initialize successfully because the UVM
-    // file-descriptor has been released.
-    if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
-        uvm_va_space_mm_unregister(va_space);
-
    // Remove the VA space from the global list before we start tearing things
    // down so other threads can't see the VA space in a partially-valid state.
    uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@ -532,7 +523,14 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)

    uvm_deferred_free_object_list(&deferred_free_list);

-    // MM FD teardown should already have destroyed va_space_mm
+    // Normally we'd expect this to happen as part of uvm_mm_release()
+    // but if userspace never initialized uvm_mm_fd that won't happen.
+    // We don't have to take the va_space_mm spinlock and update state
+    // here because we know no other thread can be in or subsequently
+    // call uvm_api_mm_initialize successfully because the UVM
+    // file-descriptor has been released.
+    if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
+        uvm_va_space_mm_unregister(va_space);
    UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));

    uvm_mutex_lock(&g_uvm_global.global_lock);
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@ -1396,6 +1396,8 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)

    nv->flags |= NV_FLAG_OPEN;

+    rm_request_dnotifier_state(sp, nv);
+
    /*
     * Now that RM init is done, allow dynamic power to control the GPU in FINE
     * mode, if enabled.  (If the mode is COARSE, this unref will do nothing
--- a/kernel-open/nvidia/nv_gpu_ops.h
+++ b/kernel-open/nvidia/nv_gpu_ops.h
@ -290,10 +290,6 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
 NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
                                  gpuChannelHandle channel);
 NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
-NV_STATUS nvGpuOpsCcslLogDeviceEncryption(struct ccslContext_t *ctx,
-                                          NvU8 *decryptIv);
-NV_STATUS nvGpuOpsCcslAcquireEncryptionIv(struct ccslContext_t *ctx,
-                                          NvU8 *encryptIv);
 NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
                               NvU8 direction);
 NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
@ -312,6 +308,8 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
                              NvU8 const *inputBuffer,
                              NvU8 const *decryptIv,
                              NvU8 *outputBuffer,
+                              NvU8 const *addAuthData,
+                              NvU32 addAuthDataSize,
                              NvU8 const *authTagBuffer);
 NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
                           NvU32 bufferSize,
@ -320,5 +318,9 @@ NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
 NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx,
                                   NvU8 direction,
                                   NvU64 *messageNum);
+NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
+                              NvU8 direction,
+                              NvU64 increment,
+                              NvU8 *iv);

 #endif /* _NV_GPU_OPS_H_*/
--- a/kernel-open/nvidia/nv_uvm_interface.c
+++ b/kernel-open/nvidia/nv_uvm_interface.c
@ -1504,44 +1504,18 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
 }
 EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);

-NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
-                                               UvmCslIv *decryptIv)
-{
-    NV_STATUS status;
-    nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
-
-    status = rm_gpu_ops_ccsl_log_device_encryption(sp, uvmCslContext->ctx, (NvU8 *)decryptIv);
-
-    return status;
-}
-EXPORT_SYMBOL(nvUvmInterfaceCslLogDeviceEncryption);
-
 NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
-                                    UvmCslDirection direction)
+                                    UvmCslOperation operation)
 {
    NV_STATUS status;
    nvidia_stack_t *sp = uvmCslContext->nvidia_stack;

-    status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, direction);
+    status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, operation);

    return status;
 }
 EXPORT_SYMBOL(nvUvmInterfaceCslRotateIv);

-NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
-                                               UvmCslIv *encryptIv)
-{
-    NV_STATUS status;
-    nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
-
-    BUILD_BUG_ON(NV_OFFSETOF(UvmCslIv, fresh) != sizeof(encryptIv->iv));
-
-    status = rm_gpu_ops_ccsl_acquire_encryption_iv(sp, uvmCslContext->ctx, (NvU8*)encryptIv);
-
-    return status;
-}
-EXPORT_SYMBOL(nvUvmInterfaceCslAcquireEncryptionIv);
-
 NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
                                   NvU32 bufferSize,
                                   NvU8 const *inputBuffer,
@ -1566,6 +1540,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
                                   NvU8 const *inputBuffer,
                                   UvmCslIv const *decryptIv,
                                   NvU8 *outputBuffer,
+                                   NvU8 const *addAuthData,
+                                   NvU32 addAuthDataSize,
                                   NvU8 const *authTagBuffer)
 {
    NV_STATUS status;
@ -1577,6 +1553,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
                                     inputBuffer,
                                     (NvU8 *)decryptIv,
                                     outputBuffer,
+                                     addAuthData,
+                                     addAuthDataSize,
                                     authTagBuffer);

    return status;
@ -1598,18 +1576,32 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
 EXPORT_SYMBOL(nvUvmInterfaceCslSign);

 NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
-                                            UvmCslDirection direction,
+                                            UvmCslOperation operation,
                                            NvU64 *messageNum)
 {
    NV_STATUS status;
    nvidia_stack_t *sp = uvmCslContext->nvidia_stack;

-    status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, direction, messageNum);
+    status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, operation, messageNum);

    return status;
 }
 EXPORT_SYMBOL(nvUvmInterfaceCslQueryMessagePool);

+NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
+                                       UvmCslOperation operation,
+                                       NvU64 increment,
+                                       UvmCslIv *iv)
+{
+    NV_STATUS status;
+    nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
+
+    status = rm_gpu_ops_ccsl_increment_iv(sp, uvmCslContext->ctx, operation, increment, (NvU8 *)iv);
+
+    return status;
+}
+EXPORT_SYMBOL(nvUvmInterfaceCslIncrementIv);
+
 #else // NV_UVM_ENABLE

 NV_STATUS nv_uvm_suspend(void)
--- a/src/common/inc/gps.h
+++ b/src/common/inc/gps.h
@ -30,6 +30,7 @@
 #define GPS_FUNC_SUPPORT                  0x00000000    // Bit list of supported functions
 #define GPS_FUNC_GETOBJBYTYPE             0x00000010    // Fetch any specific Object by Type
 #define GPS_FUNC_GETALLOBJS               0x00000011    // Fetch all Objects
+#define GPS_FUNC_REQUESTDXSTATE           0x00000012    // Request D-Notifier state
 #define GPS_FUNC_GETCALLBACKS             0x00000013    // Get system requested callbacks
 #define GPS_FUNC_PCONTROL                 0x0000001C    // GPU power control function
 #define GPS_FUNC_PSHARESTATUS             0x00000020    // Get system requested Power Steering settings
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r535_87
+    #define NV_BUILD_BRANCH             r536_08
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r535_87
+    #define NV_PUBLIC_BRANCH             r536_08
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r535_87-122"
-#define NV_BUILD_CHANGELIST_NUM         (32882771)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r536_08-145"
+#define NV_BUILD_CHANGELIST_NUM         (32940552)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r535_87-122"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32882771)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r536_08-145"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32940552)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r535_87-4"
-#define NV_BUILD_CHANGELIST_NUM         (32875904)
+#define NV_BUILD_BRANCH_VERSION         "r536_08-4"
+#define NV_BUILD_CHANGELIST_NUM         (32940552)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "535.93"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32875904)
+#define NV_BUILD_NAME                   "536.19"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32940552)
 #define NV_BUILD_BRANCH_BASE_VERSION    R535
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "535.43.02"
+#define NV_VERSION_STRING               "535.54.03"

 #else

--- a/src/common/inc/nvlog_defs.h
+++ b/src/common/inc/nvlog_defs.h
@ -120,12 +120,14 @@ struct _NVLOG_BUFFER

 #if !PORT_IS_KERNEL_BUILD
 typedef struct PORT_SPINLOCK PORT_SPINLOCK;
+typedef struct PORT_MUTEX PORT_MUTEX;
 #else
 #include "nvport/nvport.h"
 #endif

 #elif !defined(PORT_IS_KERNEL_BUILD)
 typedef struct PORT_SPINLOCK PORT_SPINLOCK;
+typedef struct PORT_MUTEX PORT_MUTEX;
 #else
 #include "nvport/nvport.h"
 #endif
@ -143,11 +145,33 @@ typedef struct _NVLOG_LOGGER
    NvU32           nextFree;
    /** Total number of free buffer slots */
    NvU32           totalFree;
-    /** Lock for all buffer oprations */
+    /** Lock for some buffer oprations */
    PORT_SPINLOCK*  mainLock;
+    /** Lock for creating/deleting pBuffers and accessing them from RmCtrls */
+    PORT_MUTEX*  buffersLock;
 } NVLOG_LOGGER;
 extern NVLOG_LOGGER NvLogLogger;

+/**
+ * NvLog uses two locks:
+ * - NVLOG_LOGGER::mainLock is used to protect some accesses to pBuffers, or
+ * an individual pBuffers entry depending on locking flags.
+ * - NVLOG_LOGGER::buffersLock is used to protect creating/deleting pBuffers and accessing them
+ * from certain RmCtrl handlers.
+ *
+ * Historically in most contexts obtaining RMAPI lock would suffice, and mainLock would optionally
+ * be used for certain buffers. Ioctl NV_ESC_RM_NVLOG_CTRL cannot touch RMAPI lock and needs
+ * to access NvLog. The latter operation might race if called at an inopportune time: e.g. if the
+ * ioctl is called during RM init when KGSP creates/deletes GSP NvLog buffers. Using buffersLock is
+ * thus necessary to resolve the potential race.
+ *
+ * This leads to an unfortunate sequence where mainLock and buffersLock are nested. The latter lock
+ * cannot be removed as it is used in IRQ paths.
+ *
+ * This should be refactored to use a single RWLock that does conditional acquire in possible IRQ
+ * paths.
+ */
+
 //
 // Buffer flags
 //
--- a/src/common/inc/swref/published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h
+++ b/src/common/inc/swref/published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h
@ -39,8 +39,12 @@
 #define NV_EP_PCFG_GPU_REVISION_ID_AND_CLASSCODE_BASE_CLASSCODE                                                31:24                /* R-IVF */
 #define NV_EP_PCFG_GPU_REVISION_ID_AND_CLASSCODE_BASE_CLASSCODE_3D                                             0x00000003           /* R-I-V */
 #define NV_EP_PCFG_GPU_BARREG0                                                                                 0x00000010           /* RW-4R */
+#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE                                                                   2:1                  /* R-IVF */
+#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE_32BIT                                                             0x00000000           /* R-I-V */
+#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE_64BIT                                                             0x00000002           /* R---V */
 #define NV_EP_PCFG_GPU_BARREG0_REG_BASE_ADDRESS                                                                31:18                /* RWIVF */
 #define NV_EP_PCFG_GPU_BARREG0_REG_BASE_ADDRESS_INIT                                                           0x00000000           /* RWI-V */
+#define NV_EP_PCFG_GPU_BARREG5                                                                                 0x00000024           /* RW-4R */
 #define NV_EP_PCFG_GPU_SUBSYSTEM_ID                                                                            0x0000002C           /* R--4R */
 #define NV_EP_PCFG_GPU_MSI_64_HEADER                                                                           0x00000048           /* RW-4R */
 #define NV_EP_PCFG_GPU_MSI_64_HEADER_MSI_ENABLE                                                                16:16                /* RWIVF */
@ -79,6 +83,11 @@
 #define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_REPLAY_NUM_ROLLOVER                                            8:8                  /* RWCVF */
 #define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_REPLAY_TIMER_TIMEOUT                                           12:12                /* RWCVF */
 #define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_ADVISORY_NON_FATAL_ERROR                                       13:13                /* RWCVF */
+#define NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF                                                                       0x0000025C           /* R--4R */
+#define NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF_TOTAL_VFS                                                             31:16                /* R-EVF */
+#define NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE                                                                   0x00000264           /* R--4R */
+#define NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE_FIRST_VF_OFFSET                                                   15:0                 /* R-IVF */
+#define NV_EP_PCFG_GPU_VF_BAR0                                                                                 0x00000274           /* RW-4R */
 #define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC                                                                          0x000002B4           /* R--4R */
 #define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC_FAULT_FUSE_POD                                                           0:0                  /* R-CVF */
 #define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC_FAULT_FUSE_SCPM                                                          1:1                  /* R-CVF */
--- a/src/common/inc/swref/published/turing/tu102/dev_nv_xve.h
+++ b/src/common/inc/swref/published/turing/tu102/dev_nv_xve.h
@ -23,9 +23,18 @@

 #ifndef __tu102_dev_nv_xve_h__
 #define __tu102_dev_nv_xve_h__
-#define NV_PCFG                             0x00088FFF:0x00088000 /* RW--D */
+#define NV_PCFG                                     0x00088FFF:0x00088000 /* RW--D */
 #define NV_XVE_MSIX_CAP_HDR                                    0x000000C8 /* RW-4R */
 #define NV_XVE_MSIX_CAP_HDR_ENABLE                                  31:31 /* RWIVF */
 #define NV_XVE_MSIX_CAP_HDR_ENABLE_ENABLED                     0x00000001 /* RW--V */
 #define NV_XVE_MSIX_CAP_HDR_ENABLE_DISABLED                    0x00000000 /* RWI-V */
+#define NV_XVE_SRIOV_CAP_HDR3                                  0x00000BD8 /* R--4R */
+#define NV_XVE_SRIOV_CAP_HDR3_TOTAL_VFS                             31:16 /* R-EVF */
+#define NV_XVE_SRIOV_CAP_HDR5                                  0x00000BE0 /* R--4R */
+#define NV_XVE_SRIOV_CAP_HDR5_FIRST_VF_OFFSET                        15:0 /* R-IVF */
+#define NV_XVE_SRIOV_CAP_HDR9                                  0x00000BF0 /* RW-4R */
+#define NV_XVE_SRIOV_CAP_HDR10                                 0x00000BF4 /* RW-4R */
+#define NV_XVE_SRIOV_CAP_HDR11_VF_BAR1_HI                      0x00000BF8 /* RW-4R */
+#define NV_XVE_SRIOV_CAP_HDR12                                 0x00000BFC /* RW-4R */
+#define NV_XVE_SRIOV_CAP_HDR13_VF_BAR2_HI                      0x00000C00 /* RW-4R */
 #endif // __tu102_dev_nv_xve_h__
--- a/src/common/nvswitch/kernel/ls10/ls10.c
+++ b/src/common/nvswitch/kernel/ls10/ls10.c
@ -1505,7 +1505,7 @@ nvswitch_reset_and_drain_links_ls10
        //   DEBUG_CLEAR (0x144) register
        // - Assert NPortWarmReset[i] using the WARMRESET (0x140) register
        //
-        // nvswitch_soe_issue_nport_reset_ls10(device, link);
+        nvswitch_soe_issue_nport_reset_ls10(device, link);

        //
        // Step 5.0 : Issue Minion request to perform the link reset sequence
@ -1583,7 +1583,7 @@ nvswitch_reset_and_drain_links_ls10
        // - Assert NPORT INITIALIZATION and program the state tracking RAMS
        // - Restore NPORT state after reset
        //
-        // nvswitch_soe_restore_nport_state_ls10(device, link);
+        nvswitch_soe_restore_nport_state_ls10(device, link);

        // Step 7.0 : Re-program the routing table for DBEs
  
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl0050.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl0050.h
@ -72,6 +72,19 @@
 *            identity mapped. To use this feature, users need to pass in the
 *            hVaspace with identity mapped addresses for the entire memory during
 *            construct.
+ *        PIPELINED
+ *            This flag allows the copy/memset operation to be pipelined with previous dma operations on the same channel
+ *            It means that its reads/writes are allowed happen before writes of preceding operations are tlb-acked
+ *            The flag can be useful when dealing with non-inersecting async operations,
+ *            but it can result in races when 2 async CE operations target the same allocation, and the second operation uses the flag
+ *            Race example:
+ *            1. async copy A -> B
+ *            2. pipelined copy B -> C
+ *            Here copy 2 can read B before copy finishes writing it, which will result in C containing invalid data
+ *            Technical details:
+ *            By default, first _LAUNCH_DMA method of a CE operation is marked has _TRANSFER_TYPE_NON_PIPELINED, which the flag overrides
+ *            Subsequent _LAUNCH_DMA methods belonging to the same operation use _TRANSFER_TYPE_PIPELINED, as each of these methods should
+ *            target different addresses
 *
 * submittedWorkId [OUT]
 *    The work submission token users can poll on to wait for work
@ -79,8 +92,9 @@
 */

 #define NV0050_CTRL_MEMSET_FLAGS_DEFAULT 0
-#define NV0050_CTRL_MEMSET_FLAGS_ASYNC    NVBIT(0)
-#define NV0050_CTRL_MEMSET_FLAGS_VIRTUAL  NVBIT(1)
+#define NV0050_CTRL_MEMSET_FLAGS_ASYNC      NVBIT(0)
+#define NV0050_CTRL_MEMSET_FLAGS_VIRTUAL    NVBIT(1)
+#define NV0050_CTRL_MEMSET_FLAGS_PIPELINED  NVBIT(2)

 #define NV0050_CTRL_CMD_MEMSET           (0x500101U) /* finn: Evaluated from "(FINN_NV_CE_UTILS_UTILS_INTERFACE_ID << 8) | NV0050_CTRL_MEMSET_PARAMS_MESSAGE_ID" */

@ -98,7 +112,7 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
 /*
 * NV0050_CTRL_CMD_MEMCOPY
 *
- * Copies from a source memoryto ssdestination  memory and releases a semaphore 
+ * Copies from a source memoryto ssdestination  memory and releases a semaphore
 * on completion
 *
 * hDstMemory  [IN]
@ -131,6 +145,19 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
 *            identity mapped. To use this feature, users need to pass in the
 *            hVaspace with identity mapped addresses for the entire memory during
 *            construct.
+ *        PIPELINED
+ *            This flag allows the copy/memset operation to be pipelined with previous dma operations on the same channel
+ *            It means that its reads/writes are allowed happen before writes of preceding operations are tlb-acked
+ *            The flag can be useful when dealing with non-inersecting async operations,
+ *            but it can result in races when 2 async CE operations target the same allocation, and the second operation uses the flag
+ *            Race example:
+ *            1. async copy A -> B
+ *            2. pipelined copy B -> C
+ *            Here copy 2 can read B before copy finishes writing it, which will result in C containing invalid data
+ *            Technical details:
+ *            By default, first _LAUNCH_DMA method of a CE operation is marked has _TRANSFER_TYPE_NON_PIPELINED, which the flag overrides
+ *            Subsequent _LAUNCH_DMA methods belonging to the same operation use _TRANSFER_TYPE_PIPELINED, as each of these methods should
+ *            target different addresses
 *
 * submittedWorkId [OUT]
 *    The work submission token users can poll on to wait for work
@ -138,8 +165,9 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
 */

 #define NV0050_CTRL_MEMCOPY_FLAGS_DEFAULT 0
-#define NV0050_CTRL_MEMCOPY_FLAGS_ASYNC    NVBIT(1)
-#define NV0050_CTRL_MEMCOPY_FLAGS_VIRTUAL  NVBIT(2)
+#define NV0050_CTRL_MEMCOPY_FLAGS_ASYNC      NVBIT(0)
+#define NV0050_CTRL_MEMCOPY_FLAGS_VIRTUAL    NVBIT(1)
+#define NV0050_CTRL_MEMCOPY_FLAGS_PIPELINED  NVBIT(2)

 #define NV0050_CTRL_CMD_MEMCOPY           (0x500102U) /* finn: Evaluated from "(FINN_NV_CE_UTILS_UTILS_INTERFACE_ID << 8 | NV0050_CTRL_MEMCOPY_PARAMS_MESSAGE_ID)" */

@ -157,12 +185,12 @@ typedef struct NV0050_CTRL_MEMCOPY_PARAMS {

 /*
 * NV0050_CTRL_CMD_CHECK_PROGRESS
- * 
+ *
 * Check if a previously submitted work item has been completed by HW.
 *
 * submittedWorkId  [IN]
 *    The work submission token users can poll on to wait for work
- *    completed by CE. 
+ *    completed by CE.
 *
 */
 #define NV0050_CTRL_CHECK_PROGRESS_RESULT_DEFAULT 0
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080event.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080event.h
@ -208,11 +208,11 @@ typedef struct NV2080_CTRL_EVENT_SET_SEMAPHORE_MEMORY_PARAMS {
 *     guestMSIData
 *       This parameter indicates the MSI data set by the guest OS.
 *
- *     vmIdType
- *       This parameter specifies the type of guest virtual machine identifier
+ *     vgpuUuid
+ *       This parameter specifies the uuid of vGPU assigned to VM.
 *
- *     guestVmId
- *       This parameter specifies the guest virtual machine identifier
+ *     domainId
+ *       This parameter specifies the unique guest virtual machine identifier
 *
 * Possible status values returned are:
 *      NV_OK
@ -225,11 +225,11 @@ typedef struct NV2080_CTRL_EVENT_SET_SEMAPHORE_MEMORY_PARAMS {

 typedef struct NV2080_CTRL_EVENT_SET_GUEST_MSI_PARAMS {
    NV_DECLARE_ALIGNED(NvU64 guestMSIAddr, 8);
-    NvU32      guestMSIData;
-    NvHandle   hSemMemory;
-    NvBool     isReset;
-    VM_ID_TYPE vmIdType;
-    NV_DECLARE_ALIGNED(VM_ID guestVmId, 8);
+    NvU32    guestMSIData;
+    NvHandle hSemMemory;
+    NvBool   isReset;
+    NvU8     vgpuUuid[VM_UUID_SIZE];
+    NV_DECLARE_ALIGNED(NvU64 domainId, 8);
 } NV2080_CTRL_EVENT_SET_GUEST_MSI_PARAMS;


--- a/src/nvidia-modeset/include/nvkms-evo.h
+++ b/src/nvidia-modeset/include/nvkms-evo.h
@ -233,6 +233,13 @@ NvBool nvValidateSetLutCommonParams(
    const NVDevEvoRec *pDevEvo,
    const struct NvKmsSetLutCommonParams *pParams);

+NvBool nvChooseColorRangeEvo(
+    enum NvKmsOutputTf tf,
+    const enum NvKmsDpyAttributeColorRangeValue requestedColorRange,
+    const enum NvKmsDpyAttributeCurrentColorSpaceValue colorSpace,
+    const enum NvKmsDpyAttributeColorBpcValue colorBpc,
+    enum NvKmsDpyAttributeColorRangeValue *pColorRange);
+
 NvBool nvChooseCurrentColorSpaceAndRangeEvo(
    const NVDpyEvoRec *pDpyEvo,
    enum NvYuv420Mode yuv420Mode,
--- a/src/nvidia-modeset/src/nvkms-evo.c
+++ b/src/nvidia-modeset/src/nvkms-evo.c
@ -2111,6 +2111,38 @@ NvBool nvGetDefaultColorSpace(
    return FALSE;
 }

+NvBool nvChooseColorRangeEvo(
+    enum NvKmsOutputTf tf,
+    const enum NvKmsDpyAttributeColorRangeValue requestedColorRange,
+    const enum NvKmsDpyAttributeCurrentColorSpaceValue colorSpace,
+    const enum NvKmsDpyAttributeColorBpcValue colorBpc,
+    enum NvKmsDpyAttributeColorRangeValue *pColorRange)
+{
+    /* Hardware supports BPC_6 only for RGB */
+    nvAssert((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) ||
+                (colorBpc != NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6));
+
+    if ((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) &&
+            (colorBpc == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6)) {
+        /* At depth 18 only RGB and full range are allowed */
+        if (tf == NVKMS_OUTPUT_TF_PQ) {
+            /* NVKMS_OUTPUT_TF_PQ requires limited color range */
+            return FALSE;
+        }
+        *pColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_FULL;
+    } else if ((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr444) ||
+               (colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr422) ||
+               (colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr420) ||
+               (tf == NVKMS_OUTPUT_TF_PQ)) {
+        /* Both YUV and NVKMS_OUTPUT_TF_PQ requires limited color range. */
+        *pColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_LIMITED;
+    } else {
+        *pColorRange = requestedColorRange;
+    }
+
+    return TRUE;
+}
+
 /*!
 * Choose current colorSpace and colorRange for the given dpy based on
 * the dpy's color format capailities, the given modeset parameters (YUV420
@ -2206,23 +2238,9 @@ NvBool nvChooseCurrentColorSpaceAndRangeEvo(
        }
    }

-    /* Hardware supports BPC_6 only for RGB */
-    nvAssert((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) ||
-                (newColorBpc != NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6));
-    /*
-     * Both YUV and NVKMS_OUTPUT_TF_PQ requires limited color range.
-     */
-    if ((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr444) ||
-        (newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr422) ||
-        (newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr420) ||
-        (tf == NVKMS_OUTPUT_TF_PQ)) {
-        newColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_LIMITED;
-    } else if ((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) &&
-               (newColorBpc == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6)) {
-        /* At depth 18 only RGB and full range are allowed */
-        newColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_FULL;
-    } else {
-        newColorRange = requestedColorRange;
+    if (!nvChooseColorRangeEvo(tf, requestedColorRange, newColorSpace,
+                               newColorBpc, &newColorRange)) {
+        return FALSE;
    }

    *pCurrentColorSpace = newColorSpace;
--- a/src/nvidia-modeset/src/nvkms-flip.c
+++ b/src/nvidia-modeset/src/nvkms-flip.c
@ -146,31 +146,23 @@ static NvBool UpdateProposedFlipStateOneApiHead(
            if (!nvIsHDRCapableHead(pDispEvo, apiHead)) {
                return FALSE;
            }
+
+            /* NVKMS_OUTPUT_TF_PQ requires the RGB color space */
+            if (pProposedApiHead->hdr.colorSpace !=
+                    NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) {
+                return FALSE;
+            }
        }

-        if (!nvChooseCurrentColorSpaceAndRangeEvo(pDpyEvo,
-                                                  pApiHeadState->timings.yuv420Mode,
-                                                  pParams->tf.val,
-                                                  pDpyEvo->requestedColorSpace,
-                                                  pDpyEvo->requestedColorRange,
-                                                  &pProposedApiHead->hdr.colorSpace,
-                                                  &pProposedApiHead->hdr.colorBpc,
-                                                  &pProposedApiHead->hdr.colorRange)) {
+        if (!nvChooseColorRangeEvo(pParams->tf.val,
+                                   pDpyEvo->requestedColorRange,
+                                   pProposedApiHead->hdr.colorSpace,
+                                   pProposedApiHead->hdr.colorBpc,
+                                   &pProposedApiHead->hdr.colorRange)) {
            return FALSE;
        }
    }

-    /*
-     * Change in colorSpace and colorBpc is not handled. For DisplayPort,
-     * colorSpace and colorBpc  can not be changed without a modeset.
-     */
-    if ((pProposedApiHead->hdr.colorSpace !=
-            pApiHeadState->attributes.colorSpace) ||
-        (pProposedApiHead->hdr.colorBpc !=
-             pApiHeadState->attributes.colorBpc)) {
-        return FALSE;
-    }
-
    if (pParams->viewPortIn.specified) {
        pProposedApiHead->dirty.viewPortPointIn = TRUE;
        pProposedApiHead->viewPortPointIn = pParams->viewPortIn.point;
--- a/src/nvidia-modeset/src/nvkms-hdmi.c
+++ b/src/nvidia-modeset/src/nvkms-hdmi.c
@ -691,6 +691,10 @@ static void SetHdmiAudioMute(const NVDispEvoRec *pDispEvo,
 static void EnableHdmiAudio(const NVDispEvoRec *pDispEvo,
                            const NvU32 head, const NvBool enable)
 {
+    /*
+     * XXX Is it correct to use pktType_GeneralControl to mute/unmute
+     * the audio? pktType_GeneralControl controls both the audio and video data.
+     */
    static const NvU8 InfoframeMutePacket[] = {
        pktType_GeneralControl, 0, 0, HDMI_GENCTRL_PACKET_MUTE_ENABLE, 0, 0, 0, 0,
        0, 0
@ -998,12 +1002,13 @@ void nvHdmiDpConstructHeadAudioState(const NvU32 displayId,
        return;
    }

+    pAudioState->isAudioOverHdmi = nvDpyIsHdmiEvo(pDpyEvo);
+
    if (FillELDBuffer(displayId,
                      nvConnectorUsesDPLib(pDpyEvo->pConnectorEvo),
                      &pDpyEvo->parsedEdid,
                      &pAudioState->eld,
                      &pAudioState->maxFreqSupported)) {
-        pAudioState->isAudioOverHdmi = nvDpyIsHdmiEvo(pDpyEvo);
        pAudioState->enabled = TRUE;
    }
 }
@ -1197,37 +1202,25 @@ void nvHdmiDpEnableDisableAudio(const NVDispEvoRec *pDispEvo,
        return;
    }

-    if (!pHeadState->audio.enabled) {
-
-        if (enable) {
-            /* Make sure to remove corresponding audio device */
+    if (!enable) {
+        /*
+         * This is pre modeset code path. If audio device is enabled
+         * (pHeadState->audio.enabled == TRUE) then invalidate ELD buffer
+         * before disabling audio.
+         */
+        if (pHeadState->audio.enabled) {
            RmSetELDAudioCaps(pDispEvo,
                              pConnectorEvo,
-                              nvDpyIdToNvU32(pConnectorEvo->displayId),
+                              pHeadState->activeRmId,
                              deviceEntry,
                              0 /* maxFreqSupported */,
                              NULL /* pEld */,
-                              NV_ELD_POWER_ON_RESET);
-        } else {
-            /* Do nothing. The audio device is already in the disabled state. */
+                              NV_ELD_PRE_MODESET);
+
+            if (nvConnectorUsesDPLib(pConnectorEvo)) {
+                SetDpAudioEnable(pDispEvo, head, FALSE /* enable */);
+            }
        }
-
-        return;
-    }
-
-    /* Invalidate ELD buffer before disabling audio */
-    if (!enable) {
-        RmSetELDAudioCaps(pDispEvo,
-                          pConnectorEvo,
-                          pHeadState->activeRmId,
-                          deviceEntry,
-                          0 /* maxFreqSupported */,
-                          NULL /* pEld */,
-                          NV_ELD_PRE_MODESET);
-    }
-
-    if (nvConnectorUsesDPLib(pConnectorEvo)) {
-        SetDpAudioEnable(pDispEvo, head, enable);
    }

    if (pHeadState->audio.isAudioOverHdmi) {
@ -1236,15 +1229,34 @@ void nvHdmiDpEnableDisableAudio(const NVDispEvoRec *pDispEvo,
        SendHdmiGcp(pDispEvo, head, !enable /* avmute */);
    }

-    /* Populate ELD buffer after enabling audio */
    if (enable) {
-        RmSetELDAudioCaps(pDispEvo,
-                          pConnectorEvo,
-                          pHeadState->activeRmId,
-                          deviceEntry,
-                          pHeadState->audio.maxFreqSupported,
-                          &pHeadState->audio.eld,
-                          NV_ELD_POST_MODESET);
+        /*
+         * This is post modeset code path. If audio device is enabled
+         * (pHeadState->audio.enabled == TRUE) then populate ELD buffer after
+         * enabling audio, otherwise make sure to remove corresponding audio
+         * device.
+         */
+        if (pHeadState->audio.enabled) {
+            if (nvConnectorUsesDPLib(pConnectorEvo)) {
+                SetDpAudioEnable(pDispEvo, head, TRUE /* enable */);
+            }
+
+            RmSetELDAudioCaps(pDispEvo,
+                              pConnectorEvo,
+                              pHeadState->activeRmId,
+                              deviceEntry,
+                              pHeadState->audio.maxFreqSupported,
+                              &pHeadState->audio.eld,
+                              NV_ELD_POST_MODESET);
+        } else {
+            RmSetELDAudioCaps(pDispEvo,
+                              pConnectorEvo,
+                              nvDpyIdToNvU32(pConnectorEvo->displayId),
+                              deviceEntry,
+                              0 /* maxFreqSupported */,
+                              NULL /* pEld */,
+                              NV_ELD_POWER_ON_RESET);
+        }
    }
 }

--- a/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-nvlog.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-nvlog.h
@ -0,0 +1,43 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#if !defined(NV_IOCTL_NVLOG)
+#define NV_IOCTL_NVLOG
+
+#include <nvtypes.h>
+#include "ctrl/ctrl0000/ctrl0000nvd.h"
+
+typedef struct
+{
+    NvU32 ctrl; // in
+    NvU32 status; // out
+    union // in/out
+    {
+        NV0000_CTRL_NVD_GET_NVLOG_INFO_PARAMS getNvlogInfo;
+        NV0000_CTRL_NVD_GET_NVLOG_BUFFER_INFO_PARAMS getNvlogBufferInfo;
+        NV0000_CTRL_NVD_GET_NVLOG_PARAMS getNvlog;
+    } params;
+} NV_NVLOG_CTRL_PARAMS;
+
+#endif
+
--- a/src/nvidia/arch/nvalloc/unix/include/nv.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv.h
@ -510,6 +510,12 @@ struct nv_file_private_t
    nv_file_private_t *ctl_nvfp;
    void *ctl_nvfp_priv;
    NvU32 register_or_refcount;
+
+    //
+    // True if a client or an event was ever allocated on this fd.
+    // If false, RMAPI cleanup is skipped.
+    //
+    NvBool bCleanupRmapi;
 };

 // Forward define the gpu ops structures
@ -959,6 +965,8 @@ NV_STATUS  NV_API_CALL  rm_perform_version_check  (nvidia_stack_t *, void *, NvU

 void       NV_API_CALL  rm_power_source_change_event        (nvidia_stack_t *, NvU32);

+void       NV_API_CALL  rm_request_dnotifier_state          (nvidia_stack_t *, nv_state_t *);
+
 void       NV_API_CALL  rm_disable_gpu_state_persistence    (nvidia_stack_t *sp, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_p2p_init_mapping       (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
 NV_STATUS  NV_API_CALL  rm_p2p_destroy_mapping    (nvidia_stack_t *, NvU64);
--- a/src/nvidia/arch/nvalloc/unix/include/nv_escape.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv_escape.h
@ -50,5 +50,6 @@
 #define NV_ESC_RM_EXPORT_OBJECT_TO_FD               0x5C
 #define NV_ESC_RM_IMPORT_OBJECT_FROM_FD             0x5D
 #define NV_ESC_RM_UPDATE_DEVICE_MAPPING_INFO        0x5E
+#define NV_ESC_RM_NVLOG_CTRL                        0x5F

 #endif // NV_ESCAPE_H_INCLUDED
--- a/src/nvidia/arch/nvalloc/unix/include/osapi.h
+++ b/src/nvidia/arch/nvalloc/unix/include/osapi.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -130,6 +130,8 @@ NV_STATUS  RmInitX86EmuState(OBJGPU *);
 void       RmFreeX86EmuState(OBJGPU *);
 NV_STATUS  RmPowerSourceChangeEvent(nv_state_t *, NvU32);

+void       RmRequestDNotifierState(nv_state_t *);
+
 const NvU8 *RmGetGpuUuidRaw(nv_state_t *);

 NV_STATUS  nv_vbios_call(OBJGPU *, NvU32 *, NvU32 *);
--- a/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c
+++ b/src/nvidia/arch/nvalloc/unix/src/dynamic-power.c
@ -2177,6 +2177,7 @@ RmPowerManagementInternal(
                        //
                        RmPowerSourceChangeEvent(nv, !ac_plugged);
                    }
+                    RmRequestDNotifierState(nv);
                }
                break;

--- a/src/nvidia/arch/nvalloc/unix/src/escape.c
+++ b/src/nvidia/arch/nvalloc/unix/src/escape.c
@ -46,6 +46,10 @@
 #include <class/cl003e.h> // NV01_MEMORY_SYSTEM
 #include <class/cl0071.h> // NV01_MEMORY_SYSTEM_OS_DESCRIPTOR

+#include "rmapi/client_resource.h"
+#include "nvlog/nvlog.h"
+#include <nv-ioctl-nvlog.h>
+
 #include <ctrl/ctrl00fd.h>

 #define NV_CTL_DEVICE_ONLY(nv)                 \
@ -839,6 +843,40 @@ NV_STATUS RmIoctl(
            break;
        }

+        case NV_ESC_RM_NVLOG_CTRL:
+        {
+            NV_NVLOG_CTRL_PARAMS *pParams = data;
+
+            NV_CTL_DEVICE_ONLY(nv);
+
+            if (!osIsAdministrator())
+            {
+                rmStatus = NV_ERR_INSUFFICIENT_PERMISSIONS;
+                pParams->status = rmStatus;
+                goto done;
+            }
+
+            switch (pParams->ctrl)
+            {
+                // Do not use NVOC _DISPATCH here as it dereferences NULL RmClientResource*
+                case NV0000_CTRL_CMD_NVD_GET_NVLOG_INFO:
+                    rmStatus = cliresCtrlCmdNvdGetNvlogInfo_IMPL(NULL, &pParams->params.getNvlogInfo);
+                    break;
+                case NV0000_CTRL_CMD_NVD_GET_NVLOG_BUFFER_INFO:
+                    rmStatus = cliresCtrlCmdNvdGetNvlogBufferInfo_IMPL(NULL, &pParams->params.getNvlogBufferInfo);
+                    break;
+                case NV0000_CTRL_CMD_NVD_GET_NVLOG:
+                    rmStatus = cliresCtrlCmdNvdGetNvlog_IMPL(NULL, &pParams->params.getNvlog);
+                    break;
+                default:
+                    rmStatus = NV_ERR_NOT_SUPPORTED;
+                    break;
+            }
+
+            pParams->status = rmStatus;
+            goto done;
+        }
+
        case NV_ESC_REGISTER_FD:
        {
            nv_ioctl_register_fd_t *params = data;
--- a/src/nvidia/arch/nvalloc/unix/src/os.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os.c
@ -5415,6 +5415,14 @@ osDmabufIsSupported(void)
    return os_dma_buf_enabled;
 }

+void osAllocatedRmClient(void *pOsInfo)
+{
+    nv_file_private_t* nvfp = (nv_file_private_t*)pOsInfo;
+
+    if (nvfp != NULL)
+        nvfp->bCleanupRmapi = NV_TRUE;
+}
+
 NV_STATUS
 osGetEgmInfo
 (
--- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
@ -31,6 +31,7 @@
 #include <class/cl0000.h>
 #include <rmosxfac.h> // Declares RmInitRm().
 #include "gpu/gpu.h"
+#include "gps.h"
 #include <osfuncs.h>
 #include <platform/chipset/chipset.h>

@ -86,6 +87,13 @@

 #include "gpu/bus/kern_bus.h"

+//
+// If timer callback comes when PM resume is in progress, then it can't be
+// serviced. The timer needs to be rescheduled in this case. This time controls
+// the duration of rescheduling.
+//
+#define TIMER_RESCHED_TIME_DURING_PM_RESUME_NS      (100 * 1000 * 1000)
+
 //
 // Helper function which can be called before doing any RM control
 // This function:
@ -499,6 +507,8 @@ done:
        new_event->active   = NV_TRUE;
        new_event->refcount = 0;

+        nvfp->bCleanupRmapi = NV_TRUE;
+
        NV_PRINTF(LEVEL_INFO, "allocated OS event:\n");
        NV_PRINTF(LEVEL_INFO, "   hParent: 0x%x\n", hParent);
        NV_PRINTF(LEVEL_INFO, "   fd: %d\n", fd);
@ -1158,12 +1168,47 @@ NV_STATUS RmPowerSourceChangeEvent(
                           &params, sizeof(params));
 }

+/*!
+ * @brief Function to request latest D-Notifier status from SBIOS.
+ *
+ * Handle certain scenarios (like a fresh boot or suspend/resume
+ * of the system) when RM is not available to receive the Dx notifiers.
+ * This function gets the latest D-Notifier status from SBIOS
+ * when RM is ready to receive and handle those events.
+ * Use GPS_FUNC_REQUESTDXSTATE subfunction to invoke current Dx state.
+ *
+ * @param[in]   pNv   nv_state_t pointer.
+ */
+void RmRequestDNotifierState(
+    nv_state_t *pNv
+)
+{
+    OBJGPU *pGpu         = NV_GET_NV_PRIV_PGPU(pNv);
+    NvU32 supportedFuncs = 0;
+    NvU16 dsmDataSize    = sizeof(supportedFuncs);
+    NV_STATUS status     = NV_OK; 
+
+    status = osCallACPI_DSM(pGpu, ACPI_DSM_FUNCTION_GPS_2X,
+                            GPS_FUNC_REQUESTDXSTATE, &supportedFuncs,
+                            &dsmDataSize);
+    if (status != NV_OK)
+    {
+        //
+        // Call for 'GPS_FUNC_REQUESTDXSTATE' subfunction may fail if the
+        // SBIOS/EC does not have the corresponding implementation.
+        //
+        NV_PRINTF(LEVEL_INFO,
+                  "%s: Failed to request Dx event update, status 0x%x\n",
+                  __FUNCTION__, status);
+    }
+}
+
 /*!
 * @brief Deal with D-notifier events to apply a performance
 * level based on the requested auxiliary power-state.
 * Read confluence page "D-Notifiers on Linux" for more details.
 *
- * @param[in]   pGpu         OBJGPU pointer.
+ * @param[in]   pNv          nv_state_t pointer.
 * @param[in]   event_type   NvU32 Event type.
 */
 static void RmHandleDNotifierEvent(
@ -2551,6 +2596,16 @@ void NV_API_CALL rm_cleanup_file_private(
    OBJSYS *pSys = SYS_GET_INSTANCE();

    NV_ENTER_RM_RUNTIME(sp,fp);
+
+    //
+    // Skip cleaning up this fd if:
+    // - no RMAPI clients and events were ever allocated on this fd
+    // - no RMAPI object handles were exported on this fd
+    // Access nvfp->handles without locking as fd cleanup is synchronised by the kernel
+    //
+    if (!nvfp->bCleanupRmapi && nvfp->handles == NULL)
+        goto done;
+
    pRmApi = rmapiGetInterface(RMAPI_EXTERNAL);
    threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
    threadStateSetTimeoutOverride(&threadState, 10 * 1000);
@ -2600,6 +2655,7 @@ void NV_API_CALL rm_cleanup_file_private(
    rmapiEpilogue(pRmApi, &rmApiContext);
    threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);

+done:
    if (nvfp->ctl_nvfp != NULL)
    {
        nv_put_file_private(nvfp->ctl_nvfp_priv);
@ -3018,14 +3074,16 @@ static NV_STATUS RmRunNanoTimerCallback(
    if ((status = rmGpuLocksAcquire(GPU_LOCK_FLAGS_COND_ACQUIRE, RM_LOCK_MODULES_TMR)) != NV_OK)
    {
        TMR_EVENT *pEvent = (TMR_EVENT *)pTmrEvent;
+        NvU64 timeNs = pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) ?
+                            TIMER_RESCHED_TIME_DURING_PM_RESUME_NS :
+                            osGetTickResolution();

        //
        // We failed to acquire the lock - depending on what's holding it,
        // the lock could be held for a while, so try again soon, but not too
        // soon to prevent the owner from making forward progress indefinitely.
        //
-        return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->pOSTmrCBdata,
-                                osGetTickResolution());
+        return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->pOSTmrCBdata, timeNs);
    }

    threadStateInitISRAndDeferredIntHandler(&threadState, pGpu,
@ -3062,7 +3120,7 @@ NV_STATUS NV_API_CALL rm_run_nano_timer_callback
    if (pGpu == NULL)
        return NV_ERR_GENERIC;

-    if (!FULL_GPU_SANITY_CHECK(pGpu))
+    if (!FULL_GPU_SANITY_FOR_PM_RESUME(pGpu))
    {
        return NV_ERR_GENERIC;
    }
@ -4059,6 +4117,48 @@ void NV_API_CALL rm_power_source_change_event(
    NV_EXIT_RM_RUNTIME(sp,fp);
 }

+void NV_API_CALL rm_request_dnotifier_state(
+    nv_stack_t *sp,
+    nv_state_t *pNv
+)
+{
+    nv_priv_t *nvp = NV_GET_NV_PRIV(pNv);
+
+    if (nvp->b_mobile_config_enabled)
+    {
+        THREAD_STATE_NODE threadState;
+        void              *fp;
+        GPU_MASK          gpuMask;
+
+        NV_ENTER_RM_RUNTIME(sp,fp);
+        threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
+
+        // LOCK: acquire API lock
+        if ((rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_ACPI)) == NV_OK)
+        {
+            OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
+
+            // LOCK: acquire per device lock
+            if ((pGpu != NULL) &&
+                ((rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
+                                       GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_ACPI,
+                                       &gpuMask)) == NV_OK))
+            {
+                RmRequestDNotifierState(pNv);
+
+                // UNLOCK: release per device lock
+                rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
+            }
+
+            // UNLOCK: release API lock
+            rmapiLockRelease();
+        }
+
+        threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
+        NV_EXIT_RM_RUNTIME(sp,fp);
+    }
+}
+
 NV_STATUS NV_API_CALL rm_p2p_dma_map_pages(
    nvidia_stack_t  *sp,
    nv_dma_device_t *peer,
--- a/src/nvidia/arch/nvalloc/unix/src/osmemdesc.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osmemdesc.c
@ -72,6 +72,7 @@ osCreateMemFromOsDescriptor
    void *pPrivate;

    pClient = serverutilGetClientUnderLock(hClient);
+
    if ((pDescriptor == NvP64_NULL) ||
        (*pLimit == 0) ||
        (pClient == NULL))
@ -362,6 +363,23 @@ osCheckGpuBarsOverlapAddrRange
    return NV_OK;
 }

+static NvU64
+_doWarBug4040336
+(
+    OBJGPU *pGpu,
+    NvU64 addr
+)
+{
+    if (gpuIsWarBug4040336Enabled(pGpu))
+    {
+        if ((addr & 0xffffffff00000000ULL) == 0x7fff00000000ULL)
+        {
+            addr = addr & 0xffffffffULL;
+        }
+    }
+    return addr;
+}
+
 static NV_STATUS
 osCreateOsDescriptorFromIoMemory
 (
@ -440,6 +458,14 @@ osCreateOsDescriptorFromIoMemory
        return rmStatus;
    }

+    //
+    // BF3's PCIe MMIO bus address at 0x800000000000(CPU PA 0x7fff00000000) is
+    // too high for Ampere to address. As a result, BF3's bus address is
+    // moved to < 4GB. Now, the CPU PA and the bus address are no longer 1:1
+    // and needs to be adjusted.
+    //
+    *base = _doWarBug4040336(pGpu, *base);
+
    rmStatus = memdescCreate(ppMemDesc, pGpu, (*pLimit + 1), 0,
                             NV_MEMORY_CONTIGUOUS, ADDR_SYSMEM,
                             NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
--- a/src/nvidia/arch/nvalloc/unix/src/rm-gpu-ops.c
+++ b/src/nvidia/arch/nvalloc/unix/src/rm-gpu-ops.c
@ -869,30 +869,6 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *sp,
    return rmStatus;
 }

-NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_acquire_encryption_iv(nvidia_stack_t *sp,
-                                                            struct ccslContext_t *ctx,
-                                                            NvU8 *encryptIv)
-{
-    NV_STATUS rmStatus;
-    void *fp;
-    NV_ENTER_RM_RUNTIME(sp,fp);
-    rmStatus = nvGpuOpsCcslAcquireEncryptionIv(ctx, encryptIv);
-    NV_EXIT_RM_RUNTIME(sp,fp);
-    return rmStatus;
-}
-
-NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *sp,
-                                                            struct ccslContext_t *ctx,
-                                                            NvU8 *decryptIv)
-{
-    NV_STATUS rmStatus;
-    void *fp;
-    NV_ENTER_RM_RUNTIME(sp,fp);
-    rmStatus = nvGpuOpsCcslLogDeviceEncryption(ctx, decryptIv);
-    NV_EXIT_RM_RUNTIME(sp,fp);
-    return rmStatus;
-}
-
 NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *sp,
                                                struct ccslContext_t *ctx,
                                                NvU8 direction)
@ -942,12 +918,15 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *sp,
                                              NvU8 const *inputBuffer,
                                              NvU8 const *decryptIv,
                                              NvU8 *outputBuffer,
+                                              NvU8 const *addAuthData,
+                                              NvU32 addAuthDataSize,
                                              NvU8 const *authTagData)
 {
    NV_STATUS rmStatus;
    void *fp;
    NV_ENTER_RM_RUNTIME(sp,fp);
-    rmStatus = nvGpuOpsCcslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, outputBuffer, authTagData);
+    rmStatus = nvGpuOpsCcslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, outputBuffer,
+                                   addAuthData, addAuthDataSize, authTagData);
    NV_EXIT_RM_RUNTIME(sp,fp);
    return rmStatus;
 }
@ -979,3 +958,17 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *sp,
    NV_EXIT_RM_RUNTIME(sp,fp);
    return rmStatus;
 }
+
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *sp,
+                                                    struct ccslContext_t *ctx,
+                                                    NvU8 direction,
+                                                    NvU64 increment,
+                                                    NvU8 *iv)
+{
+    NV_STATUS rmStatus;
+    void *fp;
+    NV_ENTER_RM_RUNTIME(sp,fp);
+    rmStatus = nvGpuOpsIncrementIv(ctx, direction, increment, iv);
+    NV_EXIT_RM_RUNTIME(sp,fp);
+    return rmStatus;
+}
--- a/src/nvidia/exports_link_command.txt
+++ b/src/nvidia/exports_link_command.txt
@ -41,6 +41,7 @@
 --undefined=rm_release_all_gpus_lock
 --undefined=rm_shutdown_rm
 --undefined=rm_power_source_change_event
+--undefined=rm_request_dnotifier_state
 --undefined=rm_write_registry_binary
 --undefined=rm_write_registry_dword
 --undefined=rm_write_registry_string
@ -121,16 +122,15 @@
 --undefined=rm_gpu_ops_paging_channel_push_stream
 --undefined=rm_gpu_ops_device_create
 --undefined=rm_gpu_ops_device_destroy
--undefined=rm_gpu_ops_ccsl_acquire_encryption_iv
 --undefined=rm_gpu_ops_ccsl_sign
 --undefined=rm_gpu_ops_ccsl_encrypt
 --undefined=rm_gpu_ops_ccsl_encrypt_with_iv
 --undefined=rm_gpu_ops_ccsl_context_init
 --undefined=rm_gpu_ops_ccsl_context_clear
--undefined=rm_gpu_ops_ccsl_log_device_encryption
 --undefined=rm_gpu_ops_ccsl_rotate_iv
 --undefined=rm_gpu_ops_ccsl_decrypt
 --undefined=rm_gpu_ops_ccsl_query_message_pool
+--undefined=rm_gpu_ops_ccsl_increment_iv
 --undefined=rm_log_gpu_crash
 --undefined=rm_kernel_rmapi_op
 --undefined=nv_get_hypervisor_type
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveBooterLoadUcode_AD102.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveBooterLoadUcode_AD102.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveBooterLoadUcode_GA102.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveBooterLoadUcode_GA102.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwDebugSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwDebugSigned_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwProdSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwProdSigned_GH100.c
--- a/src/nvidia/generated/g_ccsl_nvoc.c
+++ b/src/nvidia/generated/g_ccsl_nvoc.c
@ -0,0 +1,88 @@
+#define NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
+#include "nvoc/runtime.h"
+#include "nvoc/rtti.h"
+#include "nvtypes.h"
+#include "nvport/nvport.h"
+#include "nvport/inline/util_valist.h"
+#include "utils/nvassert.h"
+#include "g_ccsl_nvoc.h"
+
+#ifdef DEBUG
+char __nvoc_class_id_uniqueness_check_0x9bf1a1 = 1;
+#endif
+
+extern const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl;
+
+void __nvoc_init_Ccsl(Ccsl*);
+void __nvoc_init_funcTable_Ccsl(Ccsl*);
+NV_STATUS __nvoc_ctor_Ccsl(Ccsl*);
+void __nvoc_init_dataField_Ccsl(Ccsl*);
+void __nvoc_dtor_Ccsl(Ccsl*);
+extern const struct NVOC_EXPORT_INFO __nvoc_export_info_Ccsl;
+
+static const struct NVOC_RTTI __nvoc_rtti_Ccsl_Ccsl = {
+    /*pClassDef=*/          &__nvoc_class_def_Ccsl,
+    /*dtor=*/               (NVOC_DYNAMIC_DTOR) &__nvoc_dtor_Ccsl,
+    /*offset=*/             0,
+};
+
+static const struct NVOC_CASTINFO __nvoc_castinfo_Ccsl = {
+    /*numRelatives=*/       1,
+    /*relatives=*/ {
+        &__nvoc_rtti_Ccsl_Ccsl,
+    },
+};
+
+// Not instantiable because it's not derived from class "Object"
+const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl = 
+{
+    /*classInfo=*/ {
+        /*size=*/               sizeof(Ccsl),
+        /*classId=*/            classId(Ccsl),
+        /*providerId=*/         &__nvoc_rtti_provider,
+#if NV_PRINTF_STRINGS_ALLOWED
+        /*name=*/               "Ccsl",
+#endif
+    },
+    /*objCreatefn=*/        (NVOC_DYNAMIC_OBJ_CREATE) NULL,
+    /*pCastInfo=*/          &__nvoc_castinfo_Ccsl,
+    /*pExportInfo=*/        &__nvoc_export_info_Ccsl
+};
+
+const struct NVOC_EXPORT_INFO __nvoc_export_info_Ccsl = 
+{
+    /*numEntries=*/     0,
+    /*pExportEntries=*/  0
+};
+
+void __nvoc_dtor_Ccsl(Ccsl *pThis) {
+    PORT_UNREFERENCED_VARIABLE(pThis);
+}
+
+void __nvoc_init_dataField_Ccsl(Ccsl *pThis) {
+    PORT_UNREFERENCED_VARIABLE(pThis);
+}
+
+NV_STATUS __nvoc_ctor_Ccsl(Ccsl *pThis) {
+    NV_STATUS status = NV_OK;
+    __nvoc_init_dataField_Ccsl(pThis);
+    goto __nvoc_ctor_Ccsl_exit; // Success
+
+__nvoc_ctor_Ccsl_exit:
+
+    return status;
+}
+
+static void __nvoc_init_funcTable_Ccsl_1(Ccsl *pThis) {
+    PORT_UNREFERENCED_VARIABLE(pThis);
+}
+
+void __nvoc_init_funcTable_Ccsl(Ccsl *pThis) {
+    __nvoc_init_funcTable_Ccsl_1(pThis);
+}
+
+void __nvoc_init_Ccsl(Ccsl *pThis) {
+    pThis->__nvoc_pbase_Ccsl = pThis;
+    __nvoc_init_funcTable_Ccsl(pThis);
+}
+
--- a/src/nvidia/generated/g_ccsl_nvoc.h
+++ b/src/nvidia/generated/g_ccsl_nvoc.h
@ -0,0 +1,189 @@
+#ifndef _G_CCSL_NVOC_H_
+#define _G_CCSL_NVOC_H_
+#include "nvoc/runtime.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "g_ccsl_nvoc.h"
+
+#ifndef CCSL_H
+#define CCSL_H
+
+#include "nvstatus.h"
+#include "nvmisc.h"
+#include "kernel/gpu/conf_compute/conf_compute.h"
+
+#define CCSL_DIR_HOST_TO_DEVICE 0
+#define CCSL_DIR_DEVICE_TO_HOST 1
+
+struct ccslContext_t
+{
+    NvHandle hClient;
+    NvHandle hChannel;
+
+    enum {CSL_MSG_CTR_32, CSL_MSG_CTR_64} msgCounterSize;
+
+    NvU8 keyIn[CC_AES_256_GCM_KEY_SIZE_BYTES];
+    union
+    {
+        struct
+        {
+            NvU8 ivIn[CC_AES_256_GCM_IV_SIZE_BYTES];
+            NvU8 ivMaskIn[CC_AES_256_GCM_IV_SIZE_BYTES];
+        };
+        NvU8 nonce[CC_HMAC_NONCE_SIZE_BYTES];
+    };
+
+    NvU8 keyOut[CC_AES_256_GCM_KEY_SIZE_BYTES];
+    NvU8 ivOut[CC_AES_256_GCM_IV_SIZE_BYTES];
+    NvU8 ivMaskOut[CC_AES_256_GCM_IV_SIZE_BYTES];
+
+    NvU64 keyHandleIn;
+    NvU64 keyHandleOut;
+};
+
+typedef struct ccslContext_t *pCcslContext;
+
+/****************************************************************************\
+ *                                                                           *
+ *      CCSL module header.                                                  *
+ *                                                                           *
+ ****************************************************************************/
+#ifdef NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
+#define PRIVATE_FIELD(x) x
+#else
+#define PRIVATE_FIELD(x) NVOC_PRIVATE_FIELD(x)
+#endif
+struct Ccsl {
+    const struct NVOC_RTTI *__nvoc_rtti;
+    struct Ccsl *__nvoc_pbase_Ccsl;
+};
+
+#ifndef __NVOC_CLASS_Ccsl_TYPEDEF__
+#define __NVOC_CLASS_Ccsl_TYPEDEF__
+typedef struct Ccsl Ccsl;
+#endif /* __NVOC_CLASS_Ccsl_TYPEDEF__ */
+
+#ifndef __nvoc_class_id_Ccsl
+#define __nvoc_class_id_Ccsl 0x9bf1a1
+#endif /* __nvoc_class_id_Ccsl */
+
+extern const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl;
+
+#define __staticCast_Ccsl(pThis) \
+    ((pThis)->__nvoc_pbase_Ccsl)
+
+#ifdef __nvoc_ccsl_h_disabled
+#define __dynamicCast_Ccsl(pThis) ((Ccsl*)NULL)
+#else //__nvoc_ccsl_h_disabled
+#define __dynamicCast_Ccsl(pThis) \
+    ((Ccsl*)__nvoc_dynamicCast(staticCast((pThis), Dynamic), classInfo(Ccsl)))
+#endif //__nvoc_ccsl_h_disabled
+
+
+NV_STATUS __nvoc_objCreateDynamic_Ccsl(Ccsl**, Dynamic*, NvU32, va_list);
+
+NV_STATUS __nvoc_objCreate_Ccsl(Ccsl**, Dynamic*, NvU32);
+#define __objCreate_Ccsl(ppNewObj, pParent, createFlags) \
+    __nvoc_objCreate_Ccsl((ppNewObj), staticCast((pParent), Dynamic), (createFlags))
+
+NV_STATUS ccslContextInitViaChannel_IMPL(pCcslContext *ppCtx, NvHandle hClient, NvHandle hChannel);
+
+
+#define ccslContextInitViaChannel(ppCtx, hClient, hChannel) ccslContextInitViaChannel_IMPL(ppCtx, hClient, hChannel)
+#define ccslContextInitViaChannel_HAL(ppCtx, hClient, hChannel) ccslContextInitViaChannel(ppCtx, hClient, hChannel)
+
+NV_STATUS ccslContextInitViaKeyId_KERNEL(struct ConfidentialCompute *pConfCompute, pCcslContext *ppCtx, NvU32 globalKeyId);
+
+
+#define ccslContextInitViaKeyId(pConfCompute, ppCtx, globalKeyId) ccslContextInitViaKeyId_KERNEL(pConfCompute, ppCtx, globalKeyId)
+#define ccslContextInitViaKeyId_HAL(pConfCompute, ppCtx, globalKeyId) ccslContextInitViaKeyId(pConfCompute, ppCtx, globalKeyId)
+
+NV_STATUS ccslRotateIv_IMPL(pCcslContext ctx, NvU8 direction);
+
+
+#define ccslRotateIv(ctx, direction) ccslRotateIv_IMPL(ctx, direction)
+#define ccslRotateIv_HAL(ctx, direction) ccslRotateIv(ctx, direction)
+
+NV_STATUS ccslEncryptWithIv_IMPL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, NvU8 *encryptIv, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, NvU8 *authTagBuffer);
+
+
+#define ccslEncryptWithIv(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncryptWithIv_IMPL(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+#define ccslEncryptWithIv_HAL(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncryptWithIv(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+
+NV_STATUS ccslEncrypt_KERNEL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, NvU8 *authTagBuffer);
+
+
+#define ccslEncrypt(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncrypt_KERNEL(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+#define ccslEncrypt_HAL(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncrypt(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+
+NV_STATUS ccslDecrypt_KERNEL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, const NvU8 *decryptIv, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, const NvU8 *authTagBuffer);
+
+
+#define ccslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslDecrypt_KERNEL(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+#define ccslDecrypt_HAL(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
+
+NV_STATUS ccslSign_IMPL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, NvU8 *authTagBuffer);
+
+
+#define ccslSign(ctx, bufferSize, inputBuffer, authTagBuffer) ccslSign_IMPL(ctx, bufferSize, inputBuffer, authTagBuffer)
+#define ccslSign_HAL(ctx, bufferSize, inputBuffer, authTagBuffer) ccslSign(ctx, bufferSize, inputBuffer, authTagBuffer)
+
+NV_STATUS ccslQueryMessagePool_IMPL(pCcslContext ctx, NvU8 direction, NvU64 *messageNum);
+
+
+#define ccslQueryMessagePool(ctx, direction, messageNum) ccslQueryMessagePool_IMPL(ctx, direction, messageNum)
+#define ccslQueryMessagePool_HAL(ctx, direction, messageNum) ccslQueryMessagePool(ctx, direction, messageNum)
+
+NV_STATUS ccslIncrementIv_IMPL(pCcslContext pCtx, NvU8 direction, NvU64 increment, NvU8 *iv);
+
+
+#define ccslIncrementIv(pCtx, direction, increment, iv) ccslIncrementIv_IMPL(pCtx, direction, increment, iv)
+#define ccslIncrementIv_HAL(pCtx, direction, increment, iv) ccslIncrementIv(pCtx, direction, increment, iv)
+
+void ccslContextClear_IMPL(pCcslContext ctx);
+
+#define ccslContextClear(ctx) ccslContextClear_IMPL(ctx)
+NV_STATUS ccslIncrementCounter_IMPL(pCcslContext pCtx, NvU8 *ctr, NvU64 increment);
+
+#define ccslIncrementCounter(pCtx, ctr, increment) ccslIncrementCounter_IMPL(pCtx, ctr, increment)
+#undef PRIVATE_FIELD
+
+#ifndef NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
+#undef ccslIncrementCounter
+NV_STATUS NVOC_PRIVATE_FUNCTION(ccslIncrementCounter)(pCcslContext pCtx, NvU8 *ctr, NvU64 increment);
+
+#endif // NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
+
+
+#endif // CCSL_H
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // _G_CCSL_NVOC_H_
--- a/src/nvidia/generated/g_ce_utils_nvoc.c
+++ b/src/nvidia/generated/g_ce_utils_nvoc.c
@ -17,7 +17,7 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_Object;

 void __nvoc_init_CeUtils(CeUtils*);
 void __nvoc_init_funcTable_CeUtils(CeUtils*);
-NV_STATUS __nvoc_ctor_CeUtils(CeUtils*, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
+NV_STATUS __nvoc_ctor_CeUtils(CeUtils*, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
 void __nvoc_init_dataField_CeUtils(CeUtils*);
 void __nvoc_dtor_CeUtils(CeUtils*);
 extern const struct NVOC_EXPORT_INFO __nvoc_export_info_CeUtils;
@ -75,13 +75,13 @@ void __nvoc_init_dataField_CeUtils(CeUtils *pThis) {
 }

 NV_STATUS __nvoc_ctor_Object(Object* );
-NV_STATUS __nvoc_ctor_CeUtils(CeUtils *pThis, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
+NV_STATUS __nvoc_ctor_CeUtils(CeUtils *pThis, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
    NV_STATUS status = NV_OK;
    status = __nvoc_ctor_Object(&pThis->__nvoc_base_Object);
    if (status != NV_OK) goto __nvoc_ctor_CeUtils_fail_Object;
    __nvoc_init_dataField_CeUtils(pThis);

-    status = __nvoc_ceutilsConstruct(pThis, arg_pGpu, arg_pAllocParams);
+    status = __nvoc_ceutilsConstruct(pThis, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);
    if (status != NV_OK) goto __nvoc_ctor_CeUtils_fail__init;
    goto __nvoc_ctor_CeUtils_exit; // Success

@ -109,7 +109,7 @@ void __nvoc_init_CeUtils(CeUtils *pThis) {
    __nvoc_init_funcTable_CeUtils(pThis);
 }

-NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
+NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
    NV_STATUS status;
    Object *pParentObj;
    CeUtils *pThis;
@ -135,7 +135,7 @@ NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 cre
    }

    __nvoc_init_CeUtils(pThis);
-    status = __nvoc_ctor_CeUtils(pThis, arg_pGpu, arg_pAllocParams);
+    status = __nvoc_ctor_CeUtils(pThis, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);
    if (status != NV_OK) goto __nvoc_objCreate_CeUtils_cleanup;

    *ppThis = pThis;
@ -156,9 +156,10 @@ __nvoc_objCreate_CeUtils_cleanup:
 NV_STATUS __nvoc_objCreateDynamic_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, va_list args) {
    NV_STATUS status;
    OBJGPU * arg_pGpu = va_arg(args, OBJGPU *);
+    KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance = va_arg(args, KERNEL_MIG_GPU_INSTANCE *);
    NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams = va_arg(args, NV0050_ALLOCATION_PARAMETERS *);

-    status = __nvoc_objCreate_CeUtils(ppThis, pParent, createFlags, arg_pGpu, arg_pAllocParams);
+    status = __nvoc_objCreate_CeUtils(ppThis, pParent, createFlags, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);

    return status;
 }
--- a/src/nvidia/generated/g_ce_utils_nvoc.h
+++ b/src/nvidia/generated/g_ce_utils_nvoc.h
@ -76,7 +76,6 @@ struct CeUtils {
    NvHandle hDevice;
    NvHandle hSubdevice;
    OBJCHANNEL *pChannel;
-    KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
    OBJGPU *pGpu;
    struct KernelCE *pKCe;
    NvBool bUseVasForCeCopy;
@ -109,47 +108,16 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_CeUtils;

 NV_STATUS __nvoc_objCreateDynamic_CeUtils(CeUtils**, Dynamic*, NvU32, va_list);

-NV_STATUS __nvoc_objCreate_CeUtils(CeUtils**, Dynamic*, NvU32, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
-#define __objCreate_CeUtils(ppNewObj, pParent, createFlags, arg_pGpu, arg_pAllocParams) \
-    __nvoc_objCreate_CeUtils((ppNewObj), staticCast((pParent), Dynamic), (createFlags), arg_pGpu, arg_pAllocParams)
+NV_STATUS __nvoc_objCreate_CeUtils(CeUtils**, Dynamic*, NvU32, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
+#define __objCreate_CeUtils(ppNewObj, pParent, createFlags, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams) \
+    __nvoc_objCreate_CeUtils((ppNewObj), staticCast((pParent), Dynamic), (createFlags), arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams)

-NV_STATUS ceutilsConstruct_IMPL(struct CeUtils *arg_pCeUtils, OBJGPU *arg_pGpu, NV0050_ALLOCATION_PARAMETERS *arg_pAllocParams);
+NV_STATUS ceutilsConstruct_IMPL(struct CeUtils *arg_pCeUtils, OBJGPU *arg_pGpu, KERNEL_MIG_GPU_INSTANCE *arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS *arg_pAllocParams);

-#define __nvoc_ceutilsConstruct(arg_pCeUtils, arg_pGpu, arg_pAllocParams) ceutilsConstruct_IMPL(arg_pCeUtils, arg_pGpu, arg_pAllocParams)
+#define __nvoc_ceutilsConstruct(arg_pCeUtils, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams) ceutilsConstruct_IMPL(arg_pCeUtils, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams)
 void ceutilsDestruct_IMPL(struct CeUtils *pCeUtils);

 #define __nvoc_ceutilsDestruct(pCeUtils) ceutilsDestruct_IMPL(pCeUtils)
-NV_STATUS ceutilsInitialize_IMPL(struct CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
-
-#ifdef __nvoc_ce_utils_h_disabled
-static inline NV_STATUS ceutilsInitialize(struct CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams) {
-    NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
-    return NV_ERR_NOT_SUPPORTED;
-}
-#else //__nvoc_ce_utils_h_disabled
-#define ceutilsInitialize(pCeUtils, pGpu, pAllocParams) ceutilsInitialize_IMPL(pCeUtils, pGpu, pAllocParams)
-#endif //__nvoc_ce_utils_h_disabled
-
-void ceutilsDeinit_IMPL(struct CeUtils *pCeUtils);
-
-#ifdef __nvoc_ce_utils_h_disabled
-static inline void ceutilsDeinit(struct CeUtils *pCeUtils) {
-    NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
-}
-#else //__nvoc_ce_utils_h_disabled
-#define ceutilsDeinit(pCeUtils) ceutilsDeinit_IMPL(pCeUtils)
-#endif //__nvoc_ce_utils_h_disabled
-
-void ceutilsRegisterGPUInstance_IMPL(struct CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance);
-
-#ifdef __nvoc_ce_utils_h_disabled
-static inline void ceutilsRegisterGPUInstance(struct CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance) {
-    NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
-}
-#else //__nvoc_ce_utils_h_disabled
-#define ceutilsRegisterGPUInstance(pCeUtils, pKernelMIGGPUInstance) ceutilsRegisterGPUInstance_IMPL(pCeUtils, pKernelMIGGPUInstance)
-#endif //__nvoc_ce_utils_h_disabled
-
 NV_STATUS ceutilsMemset_IMPL(struct CeUtils *pCeUtils, CEUTILS_MEMSET_PARAMS *pParams);

 #ifdef __nvoc_ce_utils_h_disabled
--- a/src/nvidia/generated/g_gpu_nvoc.c
+++ b/src/nvidia/generated/g_gpu_nvoc.c
@ -367,6 +367,17 @@ void __nvoc_init_dataField_OBJGPU(OBJGPU *pThis) {
    }

    pThis->bIsGspOwnedFaultBuffersEnabled = ((NvBool)(0 != 0));
+
+    // Hal field -- bEnableBar1SparseForFillPteMemUnmap
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x11f0fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 */ 
+    {
+        pThis->bEnableBar1SparseForFillPteMemUnmap = ((NvBool)(0 == 0));
+    }
+    // default
+    else
+    {
+        pThis->bEnableBar1SparseForFillPteMemUnmap = ((NvBool)(0 != 0));
+    }
 }

 NV_STATUS __nvoc_ctor_Object(Object* );
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@ -1139,6 +1139,7 @@ struct OBJGPU {
    NvBool bStateUnloading;
    NvBool bStateLoaded;
    NvBool bFullyConstructed;
+    NvBool bBf3WarBug4040336Enabled;
    NvBool bUnifiedMemorySpaceEnabled;
    NvBool bSriovEnabled;
    NvBool bWarBug200577889SriovHeavyEnabled;
@ -1186,6 +1187,7 @@ struct OBJGPU {
    NvBool bRecheckSliSupportAtResume;
    _GPU_SLI_PEER peer[2];
    NvBool bIsGspOwnedFaultBuffersEnabled;
+    NvBool bEnableBar1SparseForFillPteMemUnmap;
    _GPU_GC6_STATE gc6State;
 };

@ -3284,6 +3286,10 @@ static inline NvBool gpuIsUnifiedMemorySpaceEnabled(struct OBJGPU *pGpu) {
    return pGpu->bUnifiedMemorySpaceEnabled;
 }

+static inline NvBool gpuIsWarBug4040336Enabled(struct OBJGPU *pGpu) {
+    return pGpu->bBf3WarBug4040336Enabled;
+}
+
 static inline NvBool gpuIsSriovEnabled(struct OBJGPU *pGpu) {
    return pGpu->bSriovEnabled;
 }
--- a/src/nvidia/generated/g_kernel_bif_nvoc.c
+++ b/src/nvidia/generated/g_kernel_bif_nvoc.c
@ -473,6 +473,16 @@ static void __nvoc_init_funcTable_KernelBif_1(KernelBif *pThis, RmHalspecOwner *
        pThis->__kbifPreOsGlobalErotGrantRequest__ = &kbifPreOsGlobalErotGrantRequest_56cd7a;
    }

+    // Hal function -- kbifCacheVFInfo
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
+    {
+        pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GH100;
+    }
+    else
+    {
+        pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_TU102;
+    }
+
    pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBif_engstateConstructEngine;

    pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelBif_engstateStateInitLocked;
--- a/src/nvidia/generated/g_kernel_bif_nvoc.h
+++ b/src/nvidia/generated/g_kernel_bif_nvoc.h
@ -115,6 +115,7 @@ struct KernelBif {
    NV_STATUS (*__kbifGetPciConfigSpacePriMirror__)(struct OBJGPU *, struct KernelBif *, NvU32 *, NvU32 *);
    NV_STATUS (*__kbifGetBusOptionsAddr__)(struct OBJGPU *, struct KernelBif *, BUS_OPTIONS, NvU32 *);
    NV_STATUS (*__kbifPreOsGlobalErotGrantRequest__)(struct OBJGPU *, struct KernelBif *);
+    void (*__kbifCacheVFInfo__)(struct OBJGPU *, struct KernelBif *);
    NV_STATUS (*__kbifStatePreLoad__)(POBJGPU, struct KernelBif *, NvU32);
    NV_STATUS (*__kbifStatePostUnload__)(POBJGPU, struct KernelBif *, NvU32);
    void (*__kbifStateDestroy__)(POBJGPU, struct KernelBif *);
@ -263,6 +264,8 @@ NV_STATUS __nvoc_objCreate_KernelBif(KernelBif**, Dynamic*, NvU32);
 #define kbifGetBusOptionsAddr_HAL(pGpu, pKernelBif, options, addrReg) kbifGetBusOptionsAddr_DISPATCH(pGpu, pKernelBif, options, addrReg)
 #define kbifPreOsGlobalErotGrantRequest(pGpu, pKernelBif) kbifPreOsGlobalErotGrantRequest_DISPATCH(pGpu, pKernelBif)
 #define kbifPreOsGlobalErotGrantRequest_HAL(pGpu, pKernelBif) kbifPreOsGlobalErotGrantRequest_DISPATCH(pGpu, pKernelBif)
+#define kbifCacheVFInfo(pGpu, pKernelBif) kbifCacheVFInfo_DISPATCH(pGpu, pKernelBif)
+#define kbifCacheVFInfo_HAL(pGpu, pKernelBif) kbifCacheVFInfo_DISPATCH(pGpu, pKernelBif)
 #define kbifStatePreLoad(pGpu, pEngstate, arg0) kbifStatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
 #define kbifStatePostUnload(pGpu, pEngstate, arg0) kbifStatePostUnload_DISPATCH(pGpu, pEngstate, arg0)
 #define kbifStateDestroy(pGpu, pEngstate) kbifStateDestroy_DISPATCH(pGpu, pEngstate)
@ -606,6 +609,14 @@ static inline NV_STATUS kbifPreOsGlobalErotGrantRequest_DISPATCH(struct OBJGPU *
    return pKernelBif->__kbifPreOsGlobalErotGrantRequest__(pGpu, pKernelBif);
 }

+void kbifCacheVFInfo_TU102(struct OBJGPU *pGpu, struct KernelBif *pKernelBif);
+
+void kbifCacheVFInfo_GH100(struct OBJGPU *pGpu, struct KernelBif *pKernelBif);
+
+static inline void kbifCacheVFInfo_DISPATCH(struct OBJGPU *pGpu, struct KernelBif *pKernelBif) {
+    pKernelBif->__kbifCacheVFInfo__(pGpu, pKernelBif);
+}
+
 static inline NV_STATUS kbifStatePreLoad_DISPATCH(POBJGPU pGpu, struct KernelBif *pEngstate, NvU32 arg0) {
    return pEngstate->__kbifStatePreLoad__(pGpu, pEngstate, arg0);
 }
--- a/src/nvidia/generated/g_kernel_gsp_nvoc.h
+++ b/src/nvidia/generated/g_kernel_gsp_nvoc.h
@ -312,6 +312,7 @@ struct KernelGsp {
    struct MESSAGE_QUEUE_COLLECTION *pMQCollection;
    struct OBJRPC *pRpc;
    struct OBJRPC *pLocklessRpc;
+    char vbiosVersionStr[16];
    KernelGspFlcnUcode *pFwsecUcode;
    KernelGspFlcnUcode *pScrubberUcode;
    KernelGspFlcnUcode *pBooterLoadUcode;
@ -1172,15 +1173,15 @@ static inline NV_STATUS kgspExecuteSequencerBuffer(struct OBJGPU *pGpu, struct K
 #define kgspExecuteSequencerBuffer(pGpu, pKernelGsp, pRunCpuSeqParams) kgspExecuteSequencerBuffer_IMPL(pGpu, pKernelGsp, pRunCpuSeqParams)
 #endif //__nvoc_kernel_gsp_h_disabled

-NV_STATUS kgspParseFwsecUcodeFromVbiosImg_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode);
+NV_STATUS kgspParseFwsecUcodeFromVbiosImg_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode, NvU64 *pVbiosVersionCombined);

 #ifdef __nvoc_kernel_gsp_h_disabled
-static inline NV_STATUS kgspParseFwsecUcodeFromVbiosImg(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode) {
+static inline NV_STATUS kgspParseFwsecUcodeFromVbiosImg(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode, NvU64 *pVbiosVersionCombined) {
    NV_ASSERT_FAILED_PRECOMP("KernelGsp was disabled!");
    return NV_ERR_NOT_SUPPORTED;
 }
 #else //__nvoc_kernel_gsp_h_disabled
-#define kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode) kgspParseFwsecUcodeFromVbiosImg_IMPL(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode)
+#define kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode, pVbiosVersionCombined) kgspParseFwsecUcodeFromVbiosImg_IMPL(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode, pVbiosVersionCombined)
 #endif //__nvoc_kernel_gsp_h_disabled

 NV_STATUS kgspAllocateScrubberUcodeImage_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, KernelGspFlcnUcode **ppScrubberUcode);
--- a/src/nvidia/generated/g_kernel_sec2_nvoc.c
+++ b/src/nvidia/generated/g_kernel_sec2_nvoc.c
@ -100,8 +100,8 @@ static NV_STATUS __nvoc_thunk_KernelSec2_kflcnResetHw(struct OBJGPU *pGpu, struc
    return ksec2ResetHw(pGpu, (struct KernelSec2 *)(((unsigned char *)pKernelSec2) - __nvoc_rtti_KernelSec2_KernelFalcon.offset));
 }

-static NV_STATUS __nvoc_thunk_OBJENGSTATE_ksec2StateLoad(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
-    return engstateStateLoad(pGpu, (struct OBJENGSTATE *)(((unsigned char *)pEngstate) + __nvoc_rtti_KernelSec2_OBJENGSTATE.offset), arg0);
+static NV_STATUS __nvoc_thunk_KernelSec2_engstateStateLoad(struct OBJGPU *pGpu, struct OBJENGSTATE *pKernelSec2, NvU32 arg0) {
+    return ksec2StateLoad(pGpu, (struct KernelSec2 *)(((unsigned char *)pKernelSec2) - __nvoc_rtti_KernelSec2_OBJENGSTATE.offset), arg0);
 }

 static NV_STATUS __nvoc_thunk_OBJENGSTATE_ksec2StateUnload(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
@ -252,6 +252,17 @@ static void __nvoc_init_funcTable_KernelSec2_1(KernelSec2 *pThis, RmHalspecOwner
    // Hal function -- ksec2ResetHw
    pThis->__ksec2ResetHw__ = &ksec2ResetHw_TU102;

+    // Hal function -- ksec2StateLoad
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
+    {
+        pThis->__ksec2StateLoad__ = &ksec2StateLoad_GH100;
+    }
+    // default
+    else
+    {
+        pThis->__ksec2StateLoad__ = &ksec2StateLoad_56cd7a;
+    }
+
    // Hal function -- ksec2ReadUcodeFuseVersion
    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */ 
    {
@ -300,7 +311,7 @@ static void __nvoc_init_funcTable_KernelSec2_1(KernelSec2 *pThis, RmHalspecOwner

    pThis->__nvoc_base_KernelFalcon.__kflcnResetHw__ = &__nvoc_thunk_KernelSec2_kflcnResetHw;

-    pThis->__ksec2StateLoad__ = &__nvoc_thunk_OBJENGSTATE_ksec2StateLoad;
+    pThis->__nvoc_base_OBJENGSTATE.__engstateStateLoad__ = &__nvoc_thunk_KernelSec2_engstateStateLoad;

    pThis->__ksec2StateUnload__ = &__nvoc_thunk_OBJENGSTATE_ksec2StateUnload;

--- a/src/nvidia/generated/g_kernel_sec2_nvoc.h
+++ b/src/nvidia/generated/g_kernel_sec2_nvoc.h
@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -64,11 +64,11 @@ struct KernelSec2 {
    NV_STATUS (*__ksec2ServiceNotificationInterrupt__)(struct OBJGPU *, struct KernelSec2 *, IntrServiceServiceNotificationInterruptArguments *);
    void (*__ksec2ConfigureFalcon__)(struct OBJGPU *, struct KernelSec2 *);
    NV_STATUS (*__ksec2ResetHw__)(struct OBJGPU *, struct KernelSec2 *);
+    NV_STATUS (*__ksec2StateLoad__)(struct OBJGPU *, struct KernelSec2 *, NvU32);
    NvU32 (*__ksec2ReadUcodeFuseVersion__)(struct OBJGPU *, struct KernelSec2 *, NvU32);
    const BINDATA_ARCHIVE *(*__ksec2GetBinArchiveBlUcode__)(struct OBJGPU *, struct KernelSec2 *);
    NV_STATUS (*__ksec2GetGenericBlUcode__)(struct OBJGPU *, struct KernelSec2 *, const RM_FLCN_BL_DESC **, const NvU8 **);
    const BINDATA_ARCHIVE *(*__ksec2GetBinArchiveSecurescrubUcode__)(struct OBJGPU *, struct KernelSec2 *);
-    NV_STATUS (*__ksec2StateLoad__)(POBJGPU, struct KernelSec2 *, NvU32);
    NV_STATUS (*__ksec2StateUnload__)(POBJGPU, struct KernelSec2 *, NvU32);
    NV_STATUS (*__ksec2StateInitLocked__)(POBJGPU, struct KernelSec2 *);
    NV_STATUS (*__ksec2StatePreLoad__)(POBJGPU, struct KernelSec2 *, NvU32);
@ -127,6 +127,8 @@ NV_STATUS __nvoc_objCreate_KernelSec2(KernelSec2**, Dynamic*, NvU32);
 #define ksec2ConfigureFalcon_HAL(pGpu, pKernelSec2) ksec2ConfigureFalcon_DISPATCH(pGpu, pKernelSec2)
 #define ksec2ResetHw(pGpu, pKernelSec2) ksec2ResetHw_DISPATCH(pGpu, pKernelSec2)
 #define ksec2ResetHw_HAL(pGpu, pKernelSec2) ksec2ResetHw_DISPATCH(pGpu, pKernelSec2)
+#define ksec2StateLoad(pGpu, pKernelSec2, arg0) ksec2StateLoad_DISPATCH(pGpu, pKernelSec2, arg0)
+#define ksec2StateLoad_HAL(pGpu, pKernelSec2, arg0) ksec2StateLoad_DISPATCH(pGpu, pKernelSec2, arg0)
 #define ksec2ReadUcodeFuseVersion(pGpu, pKernelSec2, ucodeId) ksec2ReadUcodeFuseVersion_DISPATCH(pGpu, pKernelSec2, ucodeId)
 #define ksec2ReadUcodeFuseVersion_HAL(pGpu, pKernelSec2, ucodeId) ksec2ReadUcodeFuseVersion_DISPATCH(pGpu, pKernelSec2, ucodeId)
 #define ksec2GetBinArchiveBlUcode(pGpu, pKernelSec2) ksec2GetBinArchiveBlUcode_DISPATCH(pGpu, pKernelSec2)
@ -135,7 +137,6 @@ NV_STATUS __nvoc_objCreate_KernelSec2(KernelSec2**, Dynamic*, NvU32);
 #define ksec2GetGenericBlUcode_HAL(pGpu, pKernelSec2, ppDesc, ppImg) ksec2GetGenericBlUcode_DISPATCH(pGpu, pKernelSec2, ppDesc, ppImg)
 #define ksec2GetBinArchiveSecurescrubUcode(pGpu, pKernelSec2) ksec2GetBinArchiveSecurescrubUcode_DISPATCH(pGpu, pKernelSec2)
 #define ksec2GetBinArchiveSecurescrubUcode_HAL(pGpu, pKernelSec2) ksec2GetBinArchiveSecurescrubUcode_DISPATCH(pGpu, pKernelSec2)
-#define ksec2StateLoad(pGpu, pEngstate, arg0) ksec2StateLoad_DISPATCH(pGpu, pEngstate, arg0)
 #define ksec2StateUnload(pGpu, pEngstate, arg0) ksec2StateUnload_DISPATCH(pGpu, pEngstate, arg0)
 #define ksec2StateInitLocked(pGpu, pEngstate) ksec2StateInitLocked_DISPATCH(pGpu, pEngstate)
 #define ksec2StatePreLoad(pGpu, pEngstate, arg0) ksec2StatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
@ -184,6 +185,16 @@ static inline NV_STATUS ksec2ResetHw_DISPATCH(struct OBJGPU *pGpu, struct Kernel
    return pKernelSec2->__ksec2ResetHw__(pGpu, pKernelSec2);
 }

+NV_STATUS ksec2StateLoad_GH100(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0);
+
+static inline NV_STATUS ksec2StateLoad_56cd7a(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0) {
+    return NV_OK;
+}
+
+static inline NV_STATUS ksec2StateLoad_DISPATCH(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0) {
+    return pKernelSec2->__ksec2StateLoad__(pGpu, pKernelSec2, arg0);
+}
+
 static inline NvU32 ksec2ReadUcodeFuseVersion_b2b553(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 ucodeId) {
    return 0;
 }
@ -224,10 +235,6 @@ static inline const BINDATA_ARCHIVE *ksec2GetBinArchiveSecurescrubUcode_DISPATCH
    return pKernelSec2->__ksec2GetBinArchiveSecurescrubUcode__(pGpu, pKernelSec2);
 }

-static inline NV_STATUS ksec2StateLoad_DISPATCH(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
-    return pEngstate->__ksec2StateLoad__(pGpu, pEngstate, arg0);
-}
-
 static inline NV_STATUS ksec2StateUnload_DISPATCH(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
    return pEngstate->__ksec2StateUnload__(pGpu, pEngstate, arg0);
 }
--- a/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h
+++ b/src/nvidia/generated/g_kernel_vgpu_mgr_nvoc.h
@ -326,9 +326,8 @@ kvgpumgrGetHostVgpuDeviceFromMdevUuid(NvU32 gpuPciId, const NvU8 *pMdevUuid,
                                      KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice);

 NV_STATUS
-kvgpumgrGetHostVgpuDeviceFromVmId(NvU32 gpuPciId, VM_ID guestVmId,
-                                  KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice,
-                                  VM_ID_TYPE vmIdType);
+kvgpumgrGetHostVgpuDeviceFromVgpuUuid(NvU32 gpuPciId, NvU8 *vgpuUuid,
+                                  KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice);

 NV_STATUS
 kvgpumgrGetCreatableVgpuTypes(struct OBJGPU *pGpu, struct KernelVgpuMgr *pKernelVgpuMgr, NvU32 pgpuIndex, NvU32* numVgpuTypes, NvU32* vgpuTypes);
--- a/src/nvidia/generated/g_mem_mgr_nvoc.h
+++ b/src/nvidia/generated/g_mem_mgr_nvoc.h
@ -233,6 +233,9 @@ typedef struct OBJCHANNEL
    NvU64                           pbGpuVA;
    NvU64                           pbGpuBitMapVA;
    NvU64                           pbGpuNotifierVA;
+    MEMORY_DESCRIPTOR               *pUserdMemdesc;
+    MEMORY_DESCRIPTOR               *pChannelBufferMemdesc;
+    MEMORY_DESCRIPTOR               *pErrNotifierMemdesc;
    NvU8                            *pbCpuVA;
    NvU8                            *pbBitMapVA;
    Nv906fControl                   *pControlGPFifo;
@ -2801,6 +2804,17 @@ static inline NV_STATUS memmgrInitSavedTopLevelScrubber(OBJGPU *arg0, struct Mem
 #define memmgrInitSavedTopLevelScrubber(arg0, arg1) memmgrInitSavedTopLevelScrubber_IMPL(arg0, arg1)
 #endif //__nvoc_mem_mgr_h_disabled

+MEMORY_DESCRIPTOR *memmgrMemUtilsGetMemDescFromHandle_IMPL(struct MemoryManager *pMemoryManager, NvHandle hClient, NvHandle hMemory);
+
+#ifdef __nvoc_mem_mgr_h_disabled
+static inline MEMORY_DESCRIPTOR *memmgrMemUtilsGetMemDescFromHandle(struct MemoryManager *pMemoryManager, NvHandle hClient, NvHandle hMemory) {
+    NV_ASSERT_FAILED_PRECOMP("MemoryManager was disabled!");
+    return NULL;
+}
+#else //__nvoc_mem_mgr_h_disabled
+#define memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, hClient, hMemory) memmgrMemUtilsGetMemDescFromHandle_IMPL(pMemoryManager, hClient, hMemory)
+#endif //__nvoc_mem_mgr_h_disabled
+
 NV_STATUS memmgrVerifyGspDmaOps_IMPL(OBJGPU *arg0, struct MemoryManager *arg1);

 #ifdef __nvoc_mem_mgr_h_disabled
--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@ -814,6 +814,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x20B5, 0x1642, 0x10de, "NVIDIA A100 80GB PCIe" },
    { 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
    { 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
+    { 0x20B7, 0x1804, 0x10de, "NVIDIA A30" },
    { 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
    { 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
    { 0x20F3, 0x179c, 0x10de, "NVIDIA A800-SXM4-80GB" },
@ -1743,6 +1744,20 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2238, 0x16b8, 0x10DE, "NVIDIA A10M-10C" },
    { 0x2238, 0x16b9, 0x10DE, "NVIDIA A10M-20C" },
    { 0x2238, 0x16e6, 0x10DE, "NVIDIA A10M-1" },
+    { 0x2321, 0x1853, 0x10DE, "NVIDIA H100L-1-12CME" },
+    { 0x2321, 0x1854, 0x10DE, "NVIDIA H100L-1-12C" },
+    { 0x2321, 0x1855, 0x10DE, "NVIDIA H100L-1-24C" },
+    { 0x2321, 0x1856, 0x10DE, "NVIDIA H100L-2-24C" },
+    { 0x2321, 0x1857, 0x10DE, "NVIDIA H100L-3-47C" },
+    { 0x2321, 0x1858, 0x10DE, "NVIDIA H100L-4-47C" },
+    { 0x2321, 0x1859, 0x10DE, "NVIDIA H100L-7-94C" },
+    { 0x2321, 0x185a, 0x10DE, "NVIDIA H100L-4C" },
+    { 0x2321, 0x185b, 0x10DE, "NVIDIA H100L-6C" },
+    { 0x2321, 0x185c, 0x10DE, "NVIDIA H100L-11C" },
+    { 0x2321, 0x185d, 0x10DE, "NVIDIA H100L-15C" },
+    { 0x2321, 0x185e, 0x10DE, "NVIDIA H100L-23C" },
+    { 0x2321, 0x185f, 0x10DE, "NVIDIA H100L-47C" },
+    { 0x2321, 0x1860, 0x10DE, "NVIDIA H100L-94C" },
    { 0x2322, 0x17e2, 0x10DE, "NVIDIA H800-1-10CME" },
    { 0x2322, 0x17e3, 0x10DE, "NVIDIA H800-1-10C" },
    { 0x2322, 0x17e4, 0x10DE, "NVIDIA H800-2-20C" },
@ -1773,6 +1788,20 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2331, 0x1798, 0x10DE, "NVIDIA H100-5C" },
    { 0x2331, 0x17f0, 0x10DE, "NVIDIA H100-1-10CME" },
    { 0x2331, 0x1844, 0x10DE, "NVIDIA H100-1-20C" },
+    { 0x233A, 0x1861, 0x10DE, "NVIDIA H800L-1-12CME" },
+    { 0x233A, 0x1862, 0x10DE, "NVIDIA H800L-1-12C" },
+    { 0x233A, 0x1863, 0x10DE, "NVIDIA H800L-1-24C" },
+    { 0x233A, 0x1864, 0x10DE, "NVIDIA H800L-2-24C" },
+    { 0x233A, 0x1865, 0x10DE, "NVIDIA H800L-3-47C" },
+    { 0x233A, 0x1866, 0x10DE, "NVIDIA H800L-4-47C" },
+    { 0x233A, 0x1867, 0x10DE, "NVIDIA H800L-7-94C" },
+    { 0x233A, 0x1868, 0x10DE, "NVIDIA H800L-4C" },
+    { 0x233A, 0x1869, 0x10DE, "NVIDIA H800L-6C" },
+    { 0x233A, 0x186a, 0x10DE, "NVIDIA H800L-11C" },
+    { 0x233A, 0x186b, 0x10DE, "NVIDIA H800L-15C" },
+    { 0x233A, 0x186c, 0x10DE, "NVIDIA H800L-23C" },
+    { 0x233A, 0x186d, 0x10DE, "NVIDIA H800L-47C" },
+    { 0x233A, 0x186e, 0x10DE, "NVIDIA H800L-94C" },
    { 0x25B6, 0x159d, 0x10DE, "NVIDIA A16-1B" },
    { 0x25B6, 0x159e, 0x10DE, "NVIDIA A16-2B" },
    { 0x25B6, 0x159f, 0x10DE, "NVIDIA A16-1Q" },
--- a/src/nvidia/generated/g_os_nvoc.h
+++ b/src/nvidia/generated/g_os_nvoc.h
@ -1213,6 +1213,8 @@ NV_STATUS osVerifySystemEnvironment(OBJGPU *pGpu);

 NV_STATUS osSanityTestIsr(OBJGPU *pGpu);

+void osAllocatedRmClient(void* pOSInfo);
+
 NV_STATUS osConfigurePcieReqAtomics(OS_GPU_INFO *pOsGpuInfo, NvU32 *pMask);

 NvBool osDmabufIsSupported(void);
--- a/src/nvidia/generated/g_rpc-structures.h
+++ b/src/nvidia/generated/g_rpc-structures.h
--- a/src/nvidia/generated/g_rpc_private.h
+++ b/src/nvidia/generated/g_rpc_private.h
@ -2402,6 +2402,40 @@ static void rpc_iGrp_ipVersions_Install_v23_03(IGRP_IP_VERSIONS_TABLE_INFO *pInf
 #endif // 
 }

+// No enabled chips use this variant provider
+static void rpc_iGrp_ipVersions_Install_v23_04(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
+{
+#if 0
+
+    POBJGPU pGpu = pInfo->pGpu;
+    OBJRPC *pRpc = (OBJRPC *) pInfo->pDynamic;
+    RPC_HAL_IFACES *pRpcHal = &pRpc->_hal;
+
+    // avoid possible unused warnings
+    pGpu += 0;
+    pRpcHal += 0;
+
+
+#endif // 
+}
+
+// No enabled chips use this variant provider
+static void rpc_iGrp_ipVersions_Install_v23_05(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
+{
+#if 0
+
+    POBJGPU pGpu = pInfo->pGpu;
+    OBJRPC *pRpc = (OBJRPC *) pInfo->pDynamic;
+    RPC_HAL_IFACES *pRpcHal = &pRpc->_hal;
+
+    // avoid possible unused warnings
+    pGpu += 0;
+    pRpcHal += 0;
+
+
+#endif // 
+}
+



@ -2920,6 +2954,12 @@ static NV_STATUS rpc_iGrp_ipVersions_getInfo(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
    static const IGRP_IP_VERSION_RANGE  RPC_IGRP_IP_VERSIONS_RANGES_v23_03[] = {
        { 0x23030000, 0xFFFFFFFF, },          // 
    };
+    static const IGRP_IP_VERSION_RANGE  RPC_IGRP_IP_VERSIONS_RANGES_v23_04[] = {
+        { 0x23040000, 0xFFFFFFFF, },          // 
+    };
+    static const IGRP_IP_VERSION_RANGE  RPC_IGRP_IP_VERSIONS_RANGES_v23_05[] = {
+        { 0x23050000, 0xFFFFFFFF, },          // 
+    };

 #define _RPC_HAL_IGRP_ENTRY_INIT(v) \
    { RPC_IGRP_IP_VERSIONS_RANGES_##v, NV_ARRAY_ELEMENTS(RPC_IGRP_IP_VERSIONS_RANGES_##v), rpc_iGrp_ipVersions_Install_##v, }
@ -3059,6 +3099,8 @@ static NV_STATUS rpc_iGrp_ipVersions_getInfo(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
        _RPC_HAL_IGRP_ENTRY_INIT(v22_01),               // 
        _RPC_HAL_IGRP_ENTRY_INIT(v23_02),               // 
        _RPC_HAL_IGRP_ENTRY_INIT(v23_03),               // 
+        _RPC_HAL_IGRP_ENTRY_INIT(v23_04),               // 
+        _RPC_HAL_IGRP_ENTRY_INIT(v23_05),               // 
    };

 #undef _RPC_HAL_IGRP_ENTRY_INIT
--- a/src/nvidia/generated/g_system_nvoc.h
+++ b/src/nvidia/generated/g_system_nvoc.h
@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -322,6 +322,16 @@ typedef struct SYS_STATIC_CONFIG
    NvBool bOsSevEnabled;
 } SYS_STATIC_CONFIG;

+typedef enum
+{
+    CPU_VENDOR_UNKNOWN = 0,
+    CPU_VENDOR_INTEL,
+    CPU_VENDOR_AMD,
+    CPU_VENDOR_WINCHIP,
+    CPU_VENDOR_CYRIX,
+    CPU_VENDOR_TRANSM
+} CPU_VENDOR;
+
 typedef struct
 {
    NvBool bInitialized;           // Set to true once we id the CPU
@ -340,6 +350,7 @@ typedef struct
                                   // filled in if CPU has embedded name
    NvU32 family;                  // Vendor defined Family/extended Family
    NvU32 model;                   // Vendor defined Model/extended Model
+    NvU8  vendor;                  // Vendor CPU_VENDOR
    NvU32 coresOnDie;              // # of cores on the die (0 if unknown)
    NvU32 platformID;              // Chip package type
    NvU8 stepping;                 // Silicon stepping
--- a/src/nvidia/generated/rmconfig.h
+++ b/src/nvidia/generated/rmconfig.h
@ -229,7 +229,7 @@
 #define RMCFG_FEATURE_PLATFORM_GSP                0  // Running as part of GSP Firmware
 #define RMCFG_FEATURE_PLATFORM_MODS_WINDOWS       0  // Running as part of MODS on Windows
 #define RMCFG_FEATURE_PLATFORM_MODS_UNIX          0  // Running as part of MODS on UNIX
-#define RMCFG_FEATURE_PLATFORM_VMWARE             0  // Running on VMware
+#define RMCFG_FEATURE_PLATFORM_UNIX_VMWARE        0  // Running on VMware
 #define RMCFG_FEATURE_ARCH_UNKNOWN                0  // unknown arch
 #define RMCFG_FEATURE_ARCH_X86                    0  // Intel x86, 32bit
 #define RMCFG_FEATURE_ARCH_X64                    0  // Intel 64bit
--- a/src/nvidia/inc/kernel/gpu/conf_compute/ccsl.h
+++ b/src/nvidia/inc/kernel/gpu/conf_compute/ccsl.h
@ -1,224 +1,3 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */

-#ifndef CCSL_H
-#define CCSL_H
+#include "g_ccsl_nvoc.h"

-#include "nvstatus.h"
-#include "nvmisc.h"
-#include "kernel/gpu/conf_compute/conf_compute.h"
-
-typedef struct ccslContext_t *pCcslContext;
-
-/*
- * Initializes a context by providing client and channel information.
- *
- * ccslContext [in / out]
- * hClient     [in]
- * hChannel    [in]
- */
-NV_STATUS
-ccslContextInitViaChannel
-(
-    pCcslContext *ppCtx,
-    NvHandle      hClient,
-    NvHandle      hChannel
-);
-
-/*
- * Initializes a context by providing key ID information.
- *
- * ConfidentialCompute [in]
- * ccslContext         [in / out]
- * globalKeyId         [in]
- */
-NV_STATUS
-ccslContextInitViaKeyId
-(
-    ConfidentialCompute *pConfCompute,
-    pCcslContext        *ppCtx,
-    NvU32                globalKeyId
-);
-
-/*
- * Clears the context and erases sensitive material such as keys.
- *
- * ccslContext [in / out]
- */
-void
-ccslContextClear
-(
-    pCcslContext ctx
-);
-
-/* To be called before library client triggers a Device-side encryption.
- * Attempts to increment the library's Device-side message counter and returns an error if it will overflow.
- *
- * ccslContext [in]
- * decryptIv   [in]
- *
- * Returns NV_ERR_INSUFFICIENT_RESOURCES if the next Device-side encryption will overflow.
- * Returns NV_OK otherwise.
- */
-NV_STATUS
-ccslLogDeviceEncryption
-(
-    pCcslContext  ctx,
-    NvU8         *decryptIv
-);
-
-/* Request the next IV to be used in encryption. Storing it explicitly enables the caller
- * to perform encryption out of order using EncryptWithIv
- *
- * ccslContext [in / out]
- * encryptIv   [out]
- *
- * Returns NV_ERR_INSUFFICIENT_RESOURCES if the next encryption will overflow.
- * Returns NV_OK otherwise.
- */
-NV_STATUS
-ccslAcquireEncryptionIv
-(
-    pCcslContext  ctx,
-    NvU8         *encryptIv
-);
-
-/* Rotate the IV for the given direction.
- *
- * ccslContext [in / out]
- * direction   [in]
- */
-NV_STATUS
-ccslRotateIv
-(
-    pCcslContext ctx,
-    NvU8         direction
-);
-
-/*
- * Encrypt and sign data using provided IV
- *
- * ccslContext   [in]
- * bufferSize    [in]     - Size of buffer to be encrypted in units of bytes.
- * inputBuffer   [in]     - Address of plaintext input buffer. For performance it should be 16-byte aligned.
- * encryptionIv  [in/out] - IV to use for encryption. The IV will be "dirtied" after this operation.
- * outputBuffer  [in/out] - Address of ciphertext output buffer.
- * authTagBuffer [in/out] - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
- *
- * Returns NV_OK.
- */
-NV_STATUS
-ccslEncryptWithIv
-(
-    pCcslContext  ctx,
-    NvU32         bufferSize,
-    NvU8 const   *inputBuffer,
-    NvU8         *encryptIv,
-    NvU8         *outputBuffer,
-    NvU8         *authTagBuffer
-);
-
-/*
- * If message counter will not overflow then encrypt and sign data.
- *
- * ccslContext   [in]
- * bufferSize    [in]     - Size of buffer to be encrypted in units of bytes.
- * inputBuffer   [in]     - Address of plaintext input buffer. For performance it should be 16-byte aligned.
- * outputBuffer  [in/out] - Address of ciphertext output buffer.
- * authTagBuffer [in/out] - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
- *
- * Returns NV_ERR_INSUFFICIENT_RESOURCES if message counter will overflow.
- * Returns NV_OK otherwise.
- */
-NV_STATUS
-ccslEncrypt
-(
-    pCcslContext  ctx,
-    NvU32         bufferSize,
-    NvU8 const   *inputBuffer,
-    NvU8         *outputBuffer,
-    NvU8         *authTagBuffer
-);
-
-/*
- * First verify authentication tag. If authentication passes then the data is decrypted.
- *
- * ccslContext   [in]
- * bufferSize    [in]     - Size of buffer to be decrypted in units of bytes.
- * inputBuffer   [in]     - Address of ciphertext input buffer. For performance it should be 16-byte aligned.
- * outputBuffer  [in/out] - Address of plaintext output buffer.
- * authTagBuffer [in]     - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
- *
- * Returns NV_ERR_INVALID_DATA if verification of the authentication tag fails.
- * Returns NV_OK otherwise.
- */
-NV_STATUS
-ccslDecrypt
-(
-    pCcslContext  ctx,
-    NvU32         bufferSize,
-    NvU8 const   *inputBuffer,
-    NvU8 const   *decryptIv,
-    NvU8         *outputBuffer,
-    NvU8 const   *authTagBuffer
-);
-
-/*
- * Sign the plaintext message.
- *
- * ccslContext   [in]
- * bufferSize    [in]     - Size of buffer to be signed in units of bytes.
- * inputBuffer   [in]     - Address of input buffer. For performance it should be 16-byte aligned.
- * authTagBuffer [in/out] - Address of authentication tag. In HCC it is 32 bytes.
- *
- * Returns NV_OK
- */
-NV_STATUS
-ccslSign
-(
-    pCcslContext  ctx,
-    NvU32         bufferSize,
-    NvU8 const   *inputBuffer,
-    NvU8         *authTagBuffer
-);
-
-#define CCSL_DIR_HOST_TO_DEVICE 0
-#define CCSL_DIR_DEVICE_TO_HOST 1
-
-/*
- * Returns the number of messages that can be encrypted by the CPU (CCSL_DIR_HOST_TO_DEVICE)
- * or encrypted by the GPU (CCSL_DIR_DEVICE_TO_HOST) before the message counter will overflow.
- *
- * ccslContext [in]
- * direction   [in]  - Either CCSL_DIR_HOST_TO_DEVICE or CCSL_DIR_DEVICE_TO_HOST.
- * messageNum  [out] - Number of messages that can be encrypted before overflow.
- */
-NV_STATUS
-ccslQueryMessagePool
-(
-    pCcslContext  ctx,
-    NvU8          direction,
-    NvU64        *messageNum
-);
-
-#endif // CCSL_H
--- a/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h
+++ b/src/nvidia/inc/kernel/gpu/gsp/gsp_static_config.h
@ -42,6 +42,19 @@
 #include "platform/chipset/chipset.h" // BUSINFO
 #include "gpu/nvbitmask.h" // NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX

+// VF related info for GSP-RM
+typedef struct GSP_VF_INFO
+{
+    NvU32  totalVFs;
+    NvU32  firstVFOffset;
+    NvU64  FirstVFBar0Address;
+    NvU64  FirstVFBar1Address;
+    NvU64  FirstVFBar2Address;
+    NvBool b64bitBar0;
+    NvBool b64bitBar1;
+    NvBool b64bitBar2;
+} GSP_VF_INFO;
+
 typedef struct GspSMInfo_t
 {
    NvU32 version;
@ -163,6 +176,7 @@ typedef struct GspSystemInfo
    NvU32 hypervisorType;
    NvBool bIsPassthru;
    NvU64 sysTimerOffsetNs;
+    GSP_VF_INFO gspVFInfo;
 } GspSystemInfo;


--- a/src/nvidia/inc/kernel/gpu/mem_mgr/ce_utils.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/ce_utils.h
@ -57,14 +57,10 @@ typedef struct
 NVOC_PREFIX(ceutils) class CeUtils : Object
 {
 public:
-    NV_STATUS ceutilsConstruct(CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
-
+    NV_STATUS ceutilsConstruct(CeUtils *pCeUtils, OBJGPU *pGpu, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance,
+                               NV0050_ALLOCATION_PARAMETERS *pAllocParams);
    void ceutilsDestruct(CeUtils *pCeUtils);

-    NV_STATUS ceutilsInitialize(CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
-    void ceutilsDeinit(CeUtils *pCeUtils);
-    void ceutilsRegisterGPUInstance(CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance);
-
    NV_STATUS ceutilsMemset(CeUtils *pCeUtils, CEUTILS_MEMSET_PARAMS *pParams);
    NV_STATUS ceutilsMemcopy(CeUtils *pCeUtils, CEUTILS_MEMCOPY_PARAMS *pParams);

@ -80,7 +76,6 @@ public:
    NvHandle hSubdevice;

    OBJCHANNEL *pChannel;
-    KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
    OBJGPU *pGpu;

    KernelCE *pKCe;
--- a/src/nvidia/inc/kernel/gpu/mem_mgr/channel_utils.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/channel_utils.h
@ -107,17 +107,8 @@
        NV_PUSH_DATA(d4);                                               \
    } while (0)

-#define READ_CHANNEL_PAYLOAD_SEMA(channel)       MEM_RD32((NvU8*)channel->pbCpuVA + \
-                                                  channel->finishPayloadOffset)
-
-#define READ_CHANNEL_PB_SEMA(channel)            MEM_RD32((NvU8*)channel->pbCpuVA + \
-                                                  channel->semaOffset)
-
-#define WRITE_CHANNEL_PB_SEMA(channel, val)      MEM_WR32((NvU8*)channel->pbCpuVA + \
-                                                  channel->semaOffset, val);
-
-#define WRITE_CHANNEL_PAYLOAD_SEMA(channel,val)  MEM_WR32((NvU8*)channel->pbCpuVA + \
-                                                  channel->finishPayloadOffset, val);
+#define READ_CHANNEL_PAYLOAD_SEMA(channel)  channelReadChannelMemdesc(channel, channel->finishPayloadOffset)
+#define READ_CHANNEL_PB_SEMA(channel)       channelReadChannelMemdesc(channel, channel->semaOffset)

 // 
 // This struct contains parameters needed to send a pushbuffer for a CE
@ -141,6 +132,7 @@ typedef struct

 NV_STATUS channelSetupIDs(OBJCHANNEL *pChannel, OBJGPU *pGpu, NvBool bUseVasForCeCopy, NvBool bMIGInUse);
 void channelSetupChannelBufferSizes(OBJCHANNEL *pChannel);
+NvU32 channelReadChannelMemdesc(OBJCHANNEL *pChannel, NvU32 offset);

 // Needed for pushbuffer management
 NV_STATUS channelWaitForFreeEntry(OBJCHANNEL *pChannel, NvU32 *pPutIndex);
--- a/src/nvidia/inc/kernel/gpu/mem_mgr/mem_scrub.h
+++ b/src/nvidia/inc/kernel/gpu/mem_mgr/mem_scrub.h
@ -87,7 +87,7 @@ typedef struct OBJMEMSCRUB {
    PSCRUB_NODE                        pScrubList;
 #if !defined(SRT_BUILD)
    // Scrubber uses ceUtils to manage CE channel
-    CeUtils                            ceUtilsObject;
+    CeUtils                           *pCeUtils;
 #endif
    struct OBJGPU                     *pGpu;
    VGPU_GUEST_PMA_SCRUB_BUFFER_RING   vgpuScrubBuffRing;
--- a/src/nvidia/inc/kernel/gpu/spdm/libspdm_includes.h
+++ b/src/nvidia/inc/kernel/gpu/spdm/libspdm_includes.h
@ -46,7 +46,7 @@
         __spdmStatus = (expr);                                         \
         if (LIBSPDM_STATUS_IS_ERROR(__spdmStatus))                     \
         {                                                              \
-             NV_PRINTF(LEVEL_INFO, "SPDM failed with status 0x%0x\n",   \
+             NV_PRINTF(LEVEL_ERROR, "SPDM failed with status 0x%0x\n",  \
                       __spdmStatus);                                   \
             status = NV_ERR_GENERIC;                                   \
             goto ErrorExit;                                            \
--- a/src/nvidia/inc/kernel/rmapi/nv_gpu_ops.h
+++ b/src/nvidia/inc/kernel/rmapi/nv_gpu_ops.h
@ -284,10 +284,6 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
 NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
                                  gpuChannelHandle channel);
 NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
-NV_STATUS nvGpuOpsCcslLogDeviceEncryption(struct ccslContext_t *ctx,
-                                          NvU8 *decryptIv);
-NV_STATUS nvGpuOpsCcslAcquireEncryptionIv(struct ccslContext_t *ctx,
-                                          NvU8 *encryptIv);
 NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
                               NvU8 direction);
 NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
@ -306,6 +302,8 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
                              NvU8 const *inputBuffer,
                              NvU8 const *decryptIv,
                              NvU8 *outputBuffer,
+                              NvU8 const *addAuthData,
+                              NvU32 addAuthDataSize,
                              NvU8 const *authTagBuffer);
 NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
                           NvU32 bufferSize,
@ -314,5 +312,9 @@ NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
 NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx,
                                   NvU8 direction,
                                   NvU64 *messageNum);
+NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
+                              NvU8 direction,
+                              NvU64 increment,
+                              NvU8 *iv);

 #endif /* _NV_GPU_OPS_H_*/
--- a/src/nvidia/interface/nv_uvm_types.h
+++ b/src/nvidia/interface/nv_uvm_types.h
@ -286,6 +286,7 @@ typedef struct UvmGpuChannelInfo_tag
    // so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
    NvU64             gpFifoGpuVa;
    NvU64             gpPutGpuVa;
+    NvU64             gpGetGpuVa;
    // GPU VA of work submission offset is needed in Confidential Computing
    // so CE channels can ring doorbell of other channels as required for
    // WLC/LCIC work submission
@ -1031,10 +1032,10 @@ typedef struct UvmCslIv
    NvU8 fresh;
 } UvmCslIv;

-typedef enum UvmCslDirection
+typedef enum UvmCslOperation
 {
-    UVM_CSL_DIR_CPU_TO_GPU,
-    UVM_CSL_DIR_GPU_TO_CPU
-} UvmCslDirection;
+    UVM_CSL_OPERATION_ENCRYPT,
+    UVM_CSL_OPERATION_DECRYPT
+} UvmCslOperation;

 #endif // _NV_UVM_TYPES_H_
--- a/src/nvidia/interface/nvrm_registry.h
+++ b/src/nvidia/interface/nvrm_registry.h
@ -1915,4 +1915,15 @@
 #define NV_REG_STR_RM_GSP_OWNED_FAULT_BUFFERS_ENABLE_NO    0x00000000
 #define NV_REG_STR_RM_GSP_OWNED_FAULT_BUFFERS_ENABLE_YES   0x00000001

+//
+// WAR for BlueField3: Bug 4040336
+// BF3's PCI MMIO bus address 0x800000000000 is too high for Ampere to address.
+// Due to this, BF3's bus address is now moved to < 4GB. So, the CPU PA is no longer
+// the same as the bus address and this regkey adjusts the CPU PA passed in to the 
+// correct bus address.
+//
+#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3 "RmDmaAdjustPeerMmioBF3"
+#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_DISABLE 0
+#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_ENABLE  1
+
 #endif // NVRM_REGISTRY_H
--- a/src/nvidia/kernel/inc/vgpu/sdk-structures.h
+++ b/src/nvidia/kernel/inc/vgpu/sdk-structures.h
@ -106,6 +106,7 @@ typedef struct vmiopd_SM_info {

 #define NV2080_CTRL_NVLINK_MAX_LINKS_v15_02  6
 #define NV2080_CTRL_NVLINK_MAX_LINKS_v1A_18 12
+#define NV2080_CTRL_NVLINK_MAX_LINKS_v23_04 24

 #define NV0000_CTRL_P2P_CAPS_INDEX_TABLE_SIZE_v15_02   8
 #define NV0000_CTRL_P2P_CAPS_INDEX_TABLE_SIZE_v1F_0D   9
--- a/src/nvidia/kernel/inc/vgpu/vgpu_version.h
+++ b/src/nvidia/kernel/inc/vgpu/vgpu_version.h
@ -31,7 +31,7 @@
 #define RPC_VERSION_FROM_VGX_VERSION(major, minor) ( DRF_NUM(_RPC, _VERSION_NUMBER, _MAJOR, major) | \
                                                     DRF_NUM(_RPC, _VERSION_NUMBER, _MINOR, minor))
 #define VGX_MAJOR_VERSION_NUMBER  0x23
-#define VGX_MINOR_VERSION_NUMBER  0x03
+#define VGX_MINOR_VERSION_NUMBER  0x05

 #define VGX_MAJOR_VERSION_NUMBER_VGPU_12_0 0x1A
 #define VGX_MINOR_VERSION_NUMBER_VGPU_12_0 0x18
@ -49,7 +49,7 @@
 * 2. This is the first break in migration compatibility after a release.
 */
 #define NV_VGPU_GRIDSW_INTERNAL_TO_EXTERNAL_VERSION_MAPPING \
-    {{0x23, 0x0}, {0x23, 0x03}, {0x12, 0x1}},               \
+    {{0x23, 0x0}, {0x23, 0x05}, {0x12, 0x1}},               \
    {{0x22, 0x0}, {0x22, 0x02}, {0x11, 0x1}},               \
    {{0x21, 0x0}, {0x21, 0x0C}, {0x10, 0x1}},               \
    {{0x20, 0x0}, {0x20, 0x04}, {0xF, 0x1}},                \
--- a/src/nvidia/kernel/vgpu/nv/rpc.c
+++ b/src/nvidia/kernel/vgpu/nv/rpc.c
@ -29,7 +29,7 @@
 //******************************************************************************

 // FIXME XXX
-#define NVOC_KERNEL_GRAPHICS_CONTEXT_H_PRIVATE_ACCESS_ALLOWED 
+#define NVOC_KERNEL_GRAPHICS_CONTEXT_H_PRIVATE_ACCESS_ALLOWED

 #include "os/os.h"
 #include "core/system.h"
@ -677,17 +677,17 @@ NV_STATUS RmRpcSetGuestSystemInfo(OBJGPU *pGpu, OBJRPC *pRpc)
    {
        if (rpcVgxVersion.majorNum != 0)
        {
-			if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) && !bSkipRpcVersionHandshake)
-			{
-				bSkipRpcVersionHandshake = NV_TRUE;
-			}
-			else
-			{
-				NV_PRINTF(LEVEL_INFO,
-						  "NVRM_RPC: Skipping RPC version handshake for instance 0x%x\n",
-						  gpuGetInstance(pGpu));
-				goto skip_ver_handshake;
-			}
+            if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) && !bSkipRpcVersionHandshake)
+            {
+                bSkipRpcVersionHandshake = NV_TRUE;
+            }
+            else
+            {
+                NV_PRINTF(LEVEL_INFO,
+                          "NVRM_RPC: Skipping RPC version handshake for instance 0x%x\n",
+                          gpuGetInstance(pGpu));
+                goto skip_ver_handshake;
+            }
        }
        else
        {
@ -1325,6 +1325,16 @@ NV_STATUS rpcGspSetSystemInfo_v17_00
        rpcInfo->hypervisorType           = hypervisorGetHypervisorType(pHypervisor);
        rpcInfo->bIsPassthru              = pGpu->bIsPassthru;

+        // Fill in VF related GPU flags
+        rpcInfo->gspVFInfo.totalVFs           = pGpu->sriovState.totalVFs;
+        rpcInfo->gspVFInfo.firstVFOffset      = pGpu->sriovState.firstVFOffset;
+        rpcInfo->gspVFInfo.FirstVFBar0Address = pGpu->sriovState.firstVFBarAddress[0];
+        rpcInfo->gspVFInfo.FirstVFBar1Address = pGpu->sriovState.firstVFBarAddress[1];
+        rpcInfo->gspVFInfo.FirstVFBar2Address = pGpu->sriovState.firstVFBarAddress[2];
+        rpcInfo->gspVFInfo.b64bitBar0         = pGpu->sriovState.b64bitVFBar0;
+        rpcInfo->gspVFInfo.b64bitBar1         = pGpu->sriovState.b64bitVFBar1;
+        rpcInfo->gspVFInfo.b64bitBar2         = pGpu->sriovState.b64bitVFBar2;
+
        OBJTMR *pTmr = GPU_GET_TIMER(pGpu);
        rpcInfo->sysTimerOffsetNs = pTmr->sysTimerOffsetNs;

--- a/src/nvidia/src/kernel/diagnostics/nvlog.c
+++ b/src/nvidia/src/kernel/diagnostics/nvlog.c
@ -81,6 +81,8 @@ NVLOG_LOGGER NvLogLogger =
 NV_STATUS
 nvlogInit(void *pData)
 {
+    NV_STATUS status = NV_OK;
+
    nvlogRegRoot = pData;
    portInitialize();
    NvLogLogger.mainLock = portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged());
@ -88,8 +90,13 @@ nvlogInit(void *pData)
    {
        return NV_ERR_INSUFFICIENT_RESOURCES;
    }
+    NvLogLogger.buffersLock = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged());
+    if (NvLogLogger.buffersLock == NULL)
+    {
+        return NV_ERR_INSUFFICIENT_RESOURCES;
+    }
    tlsInitialize();
-    return NV_OK;
+    return status;
 }

 void nvlogUpdate(void) {
@ -98,22 +105,30 @@ void nvlogUpdate(void) {
 NV_STATUS
 nvlogDestroy(void)
 {
+    NV_STATUS status = NV_OK;
    NvU32 i;

-    tlsShutdown();
    for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
    {
        nvlogDeallocBuffer(i, NV_TRUE);
    }
+
    if (NvLogLogger.mainLock != NULL)
    {
        portSyncSpinlockDestroy(NvLogLogger.mainLock);
        NvLogLogger.mainLock = NULL;
    }
+    if (NvLogLogger.buffersLock != NULL)
+    {
+        portSyncMutexDestroy(NvLogLogger.buffersLock);
+        NvLogLogger.buffersLock = NULL;
+    }

+    tlsShutdown();
    /// @todo Destructor should return void.
    portShutdown();
-    return NV_OK;
+
+    return status;
 }

 static NV_STATUS
@ -228,6 +243,7 @@ nvlogAllocBuffer
        return status;
    }

+    portSyncMutexAcquire(NvLogLogger.buffersLock);
    portSyncSpinlockAcquire(NvLogLogger.mainLock);

    if (NvLogLogger.nextFree < NVLOG_MAX_BUFFERS)
@ -249,6 +265,7 @@ nvlogAllocBuffer
        else break;
    }
    portSyncSpinlockRelease(NvLogLogger.mainLock);
+    portSyncMutexRelease(NvLogLogger.buffersLock);

    if (status != NV_OK)
    {
@ -282,11 +299,13 @@ nvlogDeallocBuffer
                                 _YES, pBuffer->flags);

    while (pBuffer->threadCount > 0) { /*spin*/ }
+    portSyncMutexAcquire(NvLogLogger.buffersLock);
    portSyncSpinlockAcquire(NvLogLogger.mainLock);
      NvLogLogger.pBuffers[hBuffer] = NULL;
      NvLogLogger.nextFree = NV_MIN(hBuffer, NvLogLogger.nextFree);
      NvLogLogger.totalFree++;
    portSyncSpinlockRelease(NvLogLogger.mainLock);
+    portSyncMutexRelease(NvLogLogger.buffersLock);

    _deallocateNvlogBuffer(pBuffer);
 }
--- a/src/nvidia/src/kernel/gpu/bif/arch/hopper/kernel_bif_gh100.c
+++ b/src/nvidia/src/kernel/gpu/bif/arch/hopper/kernel_bif_gh100.c
@ -732,3 +732,141 @@ kbifGetBusOptionsAddr_GH100
    return status;
 }

+/*!
+ * @brief: Get BAR information from PCIe config space
+ *
+ * @param[in]  pGpu               OBJGPU pointer
+ * @param[in]  barRegCSBase       The base register 0 address
+ * @param[in]  barIndex           The BAR index to check
+ * @param[out] pBarBaseAddress    The start address of the specified BAR
+ * @param[out] pIs64BitBar        To indicate if the BAR is using 64bit address
+ *
+ * @returns NV_STATUS
+ */
+static NV_STATUS
+_kbifGetBarInfo_GH100
+(
+    OBJGPU    *pGpu,
+    NvU32      barRegCSBase,
+    NvU32      barIndex,
+    NvU64     *pBarBaseAddress,
+    NvBool    *pIs64BitBar
+)
+{
+    NV_STATUS status         = NV_OK;
+    NvBool    barIs64Bit     = NV_FALSE;
+    NvU32     barAddrLow     = 0;
+    NvU32     barAddrHigh    = 0;
+    NvU32     barRegCSLimit  = barRegCSBase + NV_EP_PCFG_GPU_BARREG5 - NV_EP_PCFG_GPU_BARREG0;
+    NvU32     barRegCSOffset = barRegCSBase;
+    NvU64     barBaseAddr    = 0;
+    NvU32     i              = 0;
+
+    for (i = 0; i <= barIndex; i++)
+    {
+        if ((status = GPU_BUS_CFG_CYCLE_RD32(pGpu, barRegCSOffset, &barAddrLow)) != NV_OK)
+        {
+            return status;
+        }
+
+        //
+        // The SPACE_TYPE, ADDRESS_TYPE, PREFETCHABLE and BASE_ADDRESS fields
+        // have the same definition as for Base Address Register 0
+        //
+        barIs64Bit = FLD_TEST_DRF(_EP_PCFG_GPU, _BARREG0, _REG_ADDR_TYPE, _64BIT, barAddrLow);
+
+        if (i != barIndex)
+        {
+            barRegCSOffset += (barIs64Bit ? 8 : 4);
+
+            if (barRegCSOffset >= barRegCSLimit)
+            {
+                return NV_ERR_INVALID_INDEX;
+            }
+        }
+    }
+
+    if (pBarBaseAddress != NULL)
+    {
+        // Get the BAR address
+        barBaseAddr = barAddrLow & 0xFFFFFFF0;
+        if (barIs64Bit)
+        {
+            // Read and save the bar high address
+            status = GPU_BUS_CFG_CYCLE_RD32(pGpu, barRegCSOffset + 4, &barAddrHigh);
+            NV_ASSERT_OR_RETURN((status == NV_OK), status);
+
+            barBaseAddr |= (NvU64)barAddrHigh << 32;
+        }
+
+        *pBarBaseAddress = barBaseAddr;
+    }
+
+    if (pIs64BitBar != NULL)
+    {
+        *pIs64BitBar = barIs64Bit;
+    }
+
+    return NV_OK;
+}
+
+
+/*! @brief Fetch VF details such as no. of VFs, First VF offset etc
+ *
+ * @param[in]  pGpu        GPU object pointer
+ * @param[in]  pKernelBif  Kernel BIF object pointer
+*/
+void
+kbifCacheVFInfo_GH100
+(
+    OBJGPU    *pGpu,
+    KernelBif *pKernelBif
+)
+{
+    NV_STATUS status     = NV_OK;
+    NvU32     regVal     = 0;
+    NvU64     barAddr    = 0;
+    NvBool    barIs64Bit = NV_FALSE;
+
+    // Get total VF count
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF, &regVal);
+    if (status != NV_OK)
+    {
+        NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF\n");
+        return;
+    }
+    pGpu->sriovState.totalVFs = GPU_DRF_VAL(_EP_PCFG_GPU, _SRIOV_INIT_TOT_VF,
+                                            _TOTAL_VFS, regVal);
+
+    // Get first VF offset
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE, &regVal);
+    if (status != NV_OK)
+    {
+        NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE\n");
+        return;
+    }
+    pGpu->sriovState.firstVFOffset = GPU_DRF_VAL(_EP_PCFG_GPU, _SRIOV_FIRST_VF_STRIDE,
+                                                 _FIRST_VF_OFFSET, regVal);
+
+    // Get VF BAR0 info
+    status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 0, &barAddr, &barIs64Bit);
+    NV_ASSERT(status == NV_OK);
+
+    pGpu->sriovState.firstVFBarAddress[0] = barAddr;
+    pGpu->sriovState.b64bitVFBar0         = barIs64Bit;
+
+    // Get VF BAR1 info
+    status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 1, &barAddr, &barIs64Bit);
+    NV_ASSERT(status == NV_OK);
+
+    pGpu->sriovState.firstVFBarAddress[1] = barAddr;
+    pGpu->sriovState.b64bitVFBar1         = barIs64Bit;
+
+    // Get VF BAR2 info
+    status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 2, &barAddr, &barIs64Bit);
+    NV_ASSERT(status == NV_OK);
+
+    pGpu->sriovState.firstVFBarAddress[2] = barAddr;
+    pGpu->sriovState.b64bitVFBar2         = barIs64Bit;
+}
+
--- a/src/nvidia/src/kernel/gpu/bif/arch/turing/kernel_bif_tu102.c
+++ b/src/nvidia/src/kernel/gpu/bif/arch/turing/kernel_bif_tu102.c
@ -24,6 +24,7 @@

 /* ------------------------- System Includes -------------------------------- */
 #include "gpu/bif/kernel_bif.h"
+#include "gpu/bus/kern_bus.h"
 #include "gpu/gpu.h"

 #define NV_VGPU_EMU                           0x0000FFFF:0x0000F000 /* RW--D */
@ -199,3 +200,56 @@ kbifGetVFSparseMmapRegions_TU102
    *pNumAreas = idx;
    return NV_OK;
 }
+
+/*! @brief Fetch VF details such as no. of VFs, First VF offset etc
+ *
+ * @param[in]  pGpu        GPU object pointer
+ * @param[in]  pKernelBif  Kernel BIF object pointer
+*/
+void
+kbifCacheVFInfo_TU102
+(
+    OBJGPU    *pGpu,
+    KernelBif *pKernelBif
+)
+{
+    NV_STATUS status = NV_OK;
+    NvU32     regVal = 0;
+    NvU32     saveLo = 0;
+    NvU32     saveHi = 0;
+
+    // Get total VF count
+    GPU_BUS_CFG_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR3, &regVal);
+    pGpu->sriovState.totalVFs = GPU_DRF_VAL(_XVE, _SRIOV_CAP_HDR3,
+                                            _TOTAL_VFS, regVal);
+
+    // Get first VF offset
+    GPU_BUS_CFG_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR5, &regVal);
+    pGpu->sriovState.firstVFOffset = GPU_DRF_VAL(_XVE, _SRIOV_CAP_HDR5,
+                                                 _FIRST_VF_OFFSET, regVal);
+
+    // Get VF BAR0 first address
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR9, &saveLo);
+    NV_ASSERT(status == NV_OK);
+    pGpu->sriovState.firstVFBarAddress[0] = saveLo & 0xFFFFFFF0;
+
+    // Get VF BAR1 first address
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR10, &saveLo);
+    NV_ASSERT(status == NV_OK);
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR11_VF_BAR1_HI, &saveHi);
+    NV_ASSERT(status == NV_OK);
+    pGpu->sriovState.firstVFBarAddress[1] = (((NvU64)saveHi) << 32) + (saveLo & 0xFFFFFFF0);
+
+    // Get VF BAR2 first address
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR12, &saveLo);
+    NV_ASSERT(status == NV_OK);
+    status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR13_VF_BAR2_HI, &saveHi);
+    NV_ASSERT(status == NV_OK);
+    pGpu->sriovState.firstVFBarAddress[2] = (((NvU64)saveHi) << 32) + (saveLo & 0xFFFFFFF0);
+
+    // Get if VF BARs are 64 bit addressable
+    regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_SRIOV_CAP_HDR10);
+    pGpu->sriovState.b64bitVFBar1 = IS_BAR_64(regVal);
+    regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_SRIOV_CAP_HDR12);
+    pGpu->sriovState.b64bitVFBar2 = IS_BAR_64(regVal);
+}
--- a/src/nvidia/src/kernel/gpu/bif/kernel_bif.c
+++ b/src/nvidia/src/kernel/gpu/bif/kernel_bif.c
@ -76,6 +76,9 @@ kbifConstructEngine_IMPL
    // Cache MNOC interface support
    kbifIsMnocSupported_HAL(pGpu, pKernelBif);

+    // Cache VF info
+    kbifCacheVFInfo_HAL(pGpu, pKernelBif);
+
    // Used to track when the link has gone into Recovery, which can cause CEs.
    pKernelBif->EnteredRecoverySinceErrorsLastChecked = NV_FALSE;

--- a/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c
+++ b/src/nvidia/src/kernel/gpu/conf_compute/ccsl.c
@ -21,6 +21,8 @@
 * DEALINGS IN THE SOFTWARE.
 */

+#define NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
+
 #include "core/prelude.h"
 #include "rmconfig.h"
 #include "kernel/gpu/conf_compute/ccsl.h"
@ -34,33 +36,8 @@
 #include <hal/library/cryptlib.h>
 #include "cc_drv.h"

-struct ccslContext_t
-{
-    NvHandle hClient;
-    NvHandle hChannel;
-
-    enum {CSL_MSG_CTR_32, CSL_MSG_CTR_64} msgCounterSize;
-
-    NvU8 keyIn[CC_AES_256_GCM_KEY_SIZE_BYTES];
-    union
-    {
-        struct
-        {
-            NvU8 ivIn[CC_AES_256_GCM_IV_SIZE_BYTES];
-            NvU8 ivMaskIn[CC_AES_256_GCM_IV_SIZE_BYTES];
-        };
-        NvU8 nonce[CC_HMAC_NONCE_SIZE_BYTES];
-    };
-
-    NvU8 keyOut[CC_AES_256_GCM_KEY_SIZE_BYTES];
-    NvU8 ivOut[CC_AES_256_GCM_IV_SIZE_BYTES];
-    NvU8 ivMaskOut[CC_AES_256_GCM_IV_SIZE_BYTES];
-
-    NvU64 keyHandleIn;
-    NvU64 keyHandleOut;
-};
-
-static void ccslSplit32(NvU8 *dst, NvU32 num)
+static void
+ccslSplit32(NvU8 *dst, NvU32 num)
 {
    dst[3] = (NvU8) (num >> 24);
    dst[2] = (NvU8) (num >> 16);
@ -68,7 +45,8 @@ static void ccslSplit32(NvU8 *dst, NvU32 num)
    dst[0] = (NvU8) (num);
 }

-static void ccslSplit64(NvU8 *dst, NvU64 num)
+static void
+ccslSplit64(NvU8 *dst, NvU64 num)
 {
    dst[7] = (NvU8) (num >> 56);
    dst[6] = (NvU8) (num >> 48);
@ -80,41 +58,8 @@ static void ccslSplit64(NvU8 *dst, NvU64 num)
    dst[0] = (NvU8) (num);
 }

-static NV_STATUS incrementCounter(pCcslContext pCtx, NvU8 *ctr)
-{
-    NvU32 msgCounterLo = NvU32_BUILD(ctr[3], ctr[2], ctr[1], ctr[0]);
-
-    switch (pCtx->msgCounterSize)
-    {
-        case CSL_MSG_CTR_32:
-            if (msgCounterLo == NV_U32_MAX)
-            {
-                return NV_ERR_INSUFFICIENT_RESOURCES;
-            }
-
-            ++msgCounterLo;
-            ccslSplit32(ctr, msgCounterLo);
-            break;
-        case CSL_MSG_CTR_64:
-        {
-            NvU32 msgCounterhi = NvU32_BUILD(ctr[7], ctr[6], ctr[5], ctr[4]);
-            NvU64 msgCounter = ((NvU64) msgCounterhi << 32) | msgCounterLo;
-
-            if (msgCounter == NV_U64_MAX)
-            {
-                return NV_ERR_INSUFFICIENT_RESOURCES;
-            }
-
-            ++msgCounter;
-            ccslSplit64(ctr, msgCounter);
-            break;
-        }
-    }
-
-    return NV_OK;
-}
-
-static void writeKmbToContext
+static void
+writeKmbToContext
 (
    pCcslContext  pCtx,
    CC_KMB       *kmb
@ -159,7 +104,52 @@ static void writeKmbToContext
 }

 NV_STATUS
-ccslContextInitViaChannel
+ccslIncrementCounter_IMPL
+(
+    pCcslContext  pCtx,
+    NvU8         *ctr,
+    NvU64         increment
+)
+{
+    NvU32 msgCounterLo = NvU32_BUILD(ctr[3], ctr[2], ctr[1], ctr[0]);
+
+    switch (pCtx->msgCounterSize)
+    {
+        case CSL_MSG_CTR_32:
+            if (increment > NV_U32_MAX)
+            {
+                return NV_ERR_INVALID_ARGUMENT;
+            }
+
+            if (msgCounterLo > (NV_U32_MAX - increment))
+            {
+                return NV_ERR_INSUFFICIENT_RESOURCES;
+            }
+
+            msgCounterLo += increment;
+            ccslSplit32(ctr, msgCounterLo);
+            break;
+        case CSL_MSG_CTR_64:
+        {
+            NvU32 msgCounterHi = NvU32_BUILD(ctr[7], ctr[6], ctr[5], ctr[4]);
+            NvU64 msgCounter = ((NvU64) msgCounterHi << 32) | msgCounterLo;
+
+            if (msgCounterLo > (NV_U64_MAX - increment))
+            {
+                return NV_ERR_INSUFFICIENT_RESOURCES;
+            }
+
+            msgCounter += increment;
+            ccslSplit64(ctr, msgCounter);
+            break;
+        }
+    }
+
+    return NV_OK;
+}
+
+NV_STATUS
+ccslContextInitViaChannel_IMPL
 (
    pCcslContext *ppCtx,
    NvHandle      hClient,
@ -228,11 +218,19 @@ ccslContextInitViaChannel
        writeKmbToContext(pCtx, kmb);
    }

+        nvDbgDumpBufferBytes(kmb->encryptBundle.iv, sizeof(kmb->encryptBundle.iv));
+        nvDbgDumpBufferBytes(kmb->encryptBundle.ivMask, sizeof(kmb->encryptBundle.ivMask));
+        nvDbgDumpBufferBytes(kmb->encryptBundle.key, sizeof(kmb->encryptBundle.key));
+
+        nvDbgDumpBufferBytes(kmb->decryptBundle.iv, sizeof(kmb->decryptBundle.iv));
+        nvDbgDumpBufferBytes(kmb->decryptBundle.ivMask, sizeof(kmb->decryptBundle.ivMask));
+        nvDbgDumpBufferBytes(kmb->decryptBundle.key, sizeof(kmb->decryptBundle.key));
+
    return NV_OK;
 }

 NV_STATUS
-ccslContextInitViaKeyId
+ccslContextInitViaKeyId_KERNEL
 (
    ConfidentialCompute *pConfCompute,
    pCcslContext        *ppCtx,
@ -282,7 +280,7 @@ ccslContextInitViaKeyId
 }

 void
-ccslContextClear
+ccslContextClear_IMPL
 (
    pCcslContext pCtx
 )
@ -298,52 +296,7 @@ ccslContextClear
 }

 NV_STATUS
-ccslLogDeviceEncryption
-(
-    pCcslContext  pCtx,
-    NvU8         *decryptIv
-)
-{
-    NV_STATUS status;
-
-    status = incrementCounter(pCtx, pCtx->ivIn);
-
-    if (status != NV_OK)
-    {
-        return NV_ERR_INSUFFICIENT_RESOURCES;
-    }
-
-    portMemCopy(decryptIv, CC_AES_256_GCM_IV_SIZE_BYTES, pCtx->ivIn, CC_AES_256_GCM_IV_SIZE_BYTES);
-
-    return NV_OK;
-}
-
-NV_STATUS
-ccslAcquireEncryptionIv
-(
-    pCcslContext  pCtx,
-    NvU8         *encryptIv
-)
-{
-    NV_STATUS status;
-
-    status = incrementCounter(pCtx, pCtx->ivOut);
-
-    if (status != NV_OK)
-    {
-        return NV_ERR_INSUFFICIENT_RESOURCES;
-    }
-
-    portMemCopy(encryptIv, CC_AES_256_GCM_IV_SIZE_BYTES, pCtx->ivOut,  CC_AES_256_GCM_IV_SIZE_BYTES);
-
-    // The "freshness" bit is right after the IV.
-    encryptIv[CC_AES_256_GCM_IV_SIZE_BYTES] = 1;
-
-    return NV_OK;
-}
-
-NV_STATUS
-ccslRotateIv
+ccslRotateIv_IMPL
 (
    pCcslContext pCtx,
    NvU8         direction
@ -422,12 +375,14 @@ ccslRotateIv
 }

 NV_STATUS
-ccslEncryptWithIv
+ccslEncryptWithIv_IMPL
 (
    pCcslContext  pCtx,
    NvU32         bufferSize,
    NvU8 const   *inputBuffer,
    NvU8         *encryptIv,
+    NvU8 const   *aadBuffer,
+    NvU32         aadSize,
    NvU8         *outputBuffer,
    NvU8         *authTagBuffer
 )
@ -449,10 +404,10 @@ ccslEncryptWithIv
    }

    if(!libspdm_aead_aes_gcm_encrypt(
-                (NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
-                iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
-                inputBuffer, bufferSize, authTagBuffer, 16,
-                outputBuffer, &outputBufferSize))
+        (NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
+        iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
+        inputBuffer, bufferSize, authTagBuffer, 16,
+        outputBuffer, &outputBufferSize))
    {
        return NV_ERR_GENERIC;
    }
@ -461,11 +416,13 @@ ccslEncryptWithIv
 }

 NV_STATUS
-ccslEncrypt
+ccslEncrypt_KERNEL
 (
    pCcslContext  pCtx,
    NvU32         bufferSize,
    NvU8 const   *inputBuffer,
+    NvU8 const   *aadBuffer,
+    NvU32         aadSize,
    NvU8         *outputBuffer,
    NvU8         *authTagBuffer
 )
@ -473,7 +430,7 @@ ccslEncrypt
    NvU8   iv[CC_AES_256_GCM_IV_SIZE_BYTES] = {0};
    size_t outputBufferSize                 = bufferSize;

-    if (incrementCounter(pCtx, pCtx->ivOut) != NV_OK)
+    if (ccslIncrementCounter(pCtx, pCtx->ivOut, 1) != NV_OK)
    {
        return NV_ERR_INSUFFICIENT_RESOURCES;
    }
@ -484,10 +441,10 @@ ccslEncrypt
    }

    if(!libspdm_aead_aes_gcm_encrypt(
-                (NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
-                iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
-                inputBuffer, bufferSize, authTagBuffer, 16,
-                outputBuffer, &outputBufferSize))
+        (NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
+        iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
+        inputBuffer, bufferSize, authTagBuffer, 16,
+        outputBuffer, &outputBufferSize))
    {
        return NV_ERR_GENERIC;
    }
@ -496,12 +453,14 @@ ccslEncrypt
 }

 NV_STATUS
-ccslDecrypt
+ccslDecrypt_KERNEL
 (
    pCcslContext  pCtx,
    NvU32         bufferSize,
    NvU8 const   *inputBuffer,
    NvU8 const   *decryptIv,
+    NvU8 const   *aadBuffer,
+    NvU32         aadSize,
    NvU8         *outputBuffer,
    NvU8 const   *authTagBuffer
 )
@ -509,9 +468,14 @@ ccslDecrypt
    NvU8   iv[CC_AES_256_GCM_IV_SIZE_BYTES] = {0};
    size_t outputBufferSize = bufferSize;

+    if ((bufferSize == 0) || ((aadBuffer != NULL) && (aadSize == 0)))
+    {
+        return NV_ERR_INVALID_ARGUMENT;
+    }
+
    if (decryptIv == NULL)
    {
-        if (incrementCounter(pCtx, pCtx->ivIn) != NV_OK)
+        if (ccslIncrementCounter(pCtx, pCtx->ivIn, 1) != NV_OK)
        {
            return NV_ERR_INSUFFICIENT_RESOURCES;
        }
@ -530,10 +494,10 @@ ccslDecrypt
    }

    if(!libspdm_aead_aes_gcm_decrypt(
-                (NvU8 *)pCtx->keyIn, CC_AES_256_GCM_KEY_SIZE_BYTES,
-                iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
-                inputBuffer, bufferSize, authTagBuffer, 16,
-                outputBuffer, &outputBufferSize))
+        (NvU8 *)pCtx->keyIn, CC_AES_256_GCM_KEY_SIZE_BYTES,
+        iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
+        inputBuffer, bufferSize, authTagBuffer, 16,
+        outputBuffer, &outputBufferSize))
    {
        return NV_ERR_INVALID_DATA;
    }
@ -570,7 +534,7 @@ static NV_STATUS incrementCounter192(NvU8 *ctr)
 }

 NV_STATUS
-ccslSign
+ccslSign_IMPL
 (
    pCcslContext  pCtx,
    NvU32         bufferSize,
@ -625,11 +589,17 @@ ccslSign
    return NV_OK;
 }

-static NvU64 getMessageCounterAndLimit (pCcslContext pCtx, NvU8 *iv, NvU64 *limit)
+static NvU64
+getMessageCounterAndLimit
+(
+    pCcslContext  pCtx,
+    NvU8         *iv,
+    NvU64        *limit
+)
 {
    NvU32 msgCounterLo = NvU32_BUILD(iv[3], iv[2], iv[1], iv[0]);
-    NvU32 msgCounterHi = NvU32_BUILD(iv[7], iv[6], iv[5], iv[4]);    
-    
+    NvU32 msgCounterHi = NvU32_BUILD(iv[7], iv[6], iv[5], iv[4]);
+
    switch (pCtx->msgCounterSize)
    {
        case CSL_MSG_CTR_32:
@ -644,7 +614,7 @@ static NvU64 getMessageCounterAndLimit (pCcslContext pCtx, NvU8 *iv, NvU64 *limi
 }

 NV_STATUS
-ccslQueryMessagePool
+ccslQueryMessagePool_IMPL
 (
    pCcslContext  pCtx,
    NvU8          direction,
@ -670,3 +640,52 @@ ccslQueryMessagePool

    return NV_OK;
 }
+
+NV_STATUS
+ccslIncrementIv_IMPL
+(
+    pCcslContext  pCtx,
+    NvU8          direction,
+    NvU64         increment,
+    NvU8         *iv
+)
+{
+    NV_STATUS status;
+    void *ivPtr;
+
+    switch (direction)
+    {
+        case CCSL_DIR_HOST_TO_DEVICE:
+            ivPtr = pCtx->ivOut;
+            break;
+        case CCSL_DIR_DEVICE_TO_HOST:
+            ivPtr = pCtx->ivIn;
+            break;
+        default:
+            return NV_ERR_INVALID_ARGUMENT;
+    }
+
+    status = ccslIncrementCounter(pCtx, ivPtr, increment);
+
+    if (status != NV_OK)
+    {
+        return status;
+    }
+
+    if (iv != NULL) {
+        portMemCopy(iv, CC_AES_256_GCM_IV_SIZE_BYTES, ivPtr, CC_AES_256_GCM_IV_SIZE_BYTES);
+
+        if (direction == CCSL_DIR_HOST_TO_DEVICE)
+        {
+            // The "freshness" bit is right after the IV.
+            iv[CC_AES_256_GCM_IV_SIZE_BYTES] = 1;
+        }
+        else
+        {
+            // Decrypt IV cannot be used for encryption.
+            iv[CC_AES_256_GCM_IV_SIZE_BYTES] = 0;
+        }
+    }
+
+    return NV_OK;
+}
--- a/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_gv100.c
+++ b/src/nvidia/src/kernel/gpu/fifo/arch/volta/kernel_channel_gv100.c
@ -203,6 +203,15 @@ kchannelCreateUserdMemDesc_GV100
                               AT_GPU,
                               userdOffset);

+    // Adjust for the DMA window start address, if any
+    if (memdescGetAddressSpace(pUserdMemDescForSubDev) == ADDR_SYSMEM)
+    {
+        RmPhysAddr dmaWindowStart = gpuGetDmaStartAddress(pGpu); 
+        NV_ASSERT_OR_RETURN(userdAddr > dmaWindowStart, NV_ERR_INVALID_ADDRESS);
+
+        userdAddr -= dmaWindowStart;
+    }
+
    userdAddrLo = NvU64_LO32(userdAddr) >> userdShift;
    userdAddrHi = NvU64_HI32(userdAddr);

--- a/src/nvidia/src/kernel/gpu/gpu_gspclient.c
+++ b/src/nvidia/src/kernel/gpu/gpu_gspclient.c
@ -65,26 +65,18 @@ gpuInitSriov_FWCLIENT
    GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
    NvU32 totalPcieFns = 0;

-    pGpu->sriovState.totalVFs             = pGSCI->sriovCaps.totalVFs;
-    pGpu->sriovState.firstVFOffset        = pGSCI->sriovCaps.firstVfOffset;
-    pGpu->sriovState.firstVFBarAddress[0] = pGSCI->sriovCaps.FirstVFBar0Address;
-    pGpu->sriovState.firstVFBarAddress[1] = pGSCI->sriovCaps.FirstVFBar1Address;
-    pGpu->sriovState.firstVFBarAddress[2] = pGSCI->sriovCaps.FirstVFBar2Address;
-    pGpu->sriovState.vfBarSize[0]         = pGSCI->sriovCaps.bar0Size;
-    pGpu->sriovState.vfBarSize[1]         = pGSCI->sriovCaps.bar1Size;
-    pGpu->sriovState.vfBarSize[2]         = pGSCI->sriovCaps.bar2Size;
-    pGpu->sriovState.b64bitVFBar0         = pGSCI->sriovCaps.b64bitBar0;
-    pGpu->sriovState.b64bitVFBar1         = pGSCI->sriovCaps.b64bitBar1;
-    pGpu->sriovState.b64bitVFBar2         = pGSCI->sriovCaps.b64bitBar2;
+    pGpu->sriovState.vfBarSize[0] = pGSCI->sriovCaps.bar0Size;
+    pGpu->sriovState.vfBarSize[1] = pGSCI->sriovCaps.bar1Size;
+    pGpu->sriovState.vfBarSize[2] = pGSCI->sriovCaps.bar2Size;

-    pGpu->sriovState.maxGfid              = pGSCI->sriovMaxGfid;
+    pGpu->sriovState.maxGfid      = pGSCI->sriovMaxGfid;

    // note: pGpu->sriovState.virtualRegPhysOffset is initialized separately

    // owned by physical RM, so leave uninitialized
-    pGpu->sriovState.pP2PInfo             = NULL;
-    pGpu->sriovState.bP2PAllocated        = NV_FALSE;
-    pGpu->sriovState.maxP2pGfid           = 0;
+    pGpu->sriovState.pP2PInfo      = NULL;
+    pGpu->sriovState.bP2PAllocated = NV_FALSE;
+    pGpu->sriovState.maxP2pGfid    = 0;

    // Include Physical function that occupies GFID 0
    totalPcieFns = pGpu->sriovState.totalVFs + 1;
--- a/src/nvidia/src/kernel/gpu/gpu_registry.c
+++ b/src/nvidia/src/kernel/gpu/gpu_registry.c
@ -178,6 +178,13 @@ gpuInitRegistryOverrides_KERNEL
                        DRF_NUM(_REG_STR, _RM_GPU_FABRIC_PROBE, _OVERRIDE, 1);
    }

+    pGpu->bBf3WarBug4040336Enabled = NV_FALSE;
+    if (osReadRegistryDword(pGpu, NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3,
+                            &data32) == NV_OK)
+    {
+        pGpu->bBf3WarBug4040336Enabled = (data32 == NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_ENABLE);
+    }
+
    return NV_OK;
 }

--- a/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c
+++ b/src/nvidia/src/kernel/gpu/gr/kernel_graphics_context.c
@ -2343,24 +2343,27 @@ kgrctxUnmapAssociatedCtxBuffers_IMPL
    // channels could be using these mappings, and we must wait for both
    // channels to be detached before we remove them.
    //
+    if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->pChanList != NULL)
    {
-        RS_ORDERED_ITERATOR it;
-        RsResourceRef *pScopeRef = RES_GET_REF(pKernelChannel);
+        CHANNEL_NODE *pChanNode;
+        CHANNEL_LIST *pChanList;

-        // Iterate over all channels in this TSG and check for duplicate VAS
-        if (!pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm)
-            pScopeRef = RES_GET_REF(pKernelChannel->pKernelChannelGroupApi);
+        pChanList = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->pChanList;

-        it = kchannelGetIter(RES_GET_CLIENT(pKernelChannel), pScopeRef);
-        while (clientRefOrderedIterNext(it.pClient, &it))
+        for (pChanNode = pChanList->pHead; pChanNode; pChanNode = pChanNode->pNext)
        {
-            KernelChannel *pLoopKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel);
-            NV_ASSERT_OR_RETURN_VOID(pLoopKernelChannel != NULL);
-
-            if (pLoopKernelChannel == pKernelChannel)
+            // Skip the channel we are looking to unmap
+            if (kchannelGetDebugTag(pKernelChannel) == kchannelGetDebugTag(pChanNode->pKernelChannel))
                continue;
-
-            NV_CHECK_OR_RETURN_VOID(LEVEL_SILENT, pLoopKernelChannel->pVAS != pKernelChannel->pVAS);
+            
+            if (pKernelChannel->pVAS == pChanNode->pKernelChannel->pVAS)
+            {
+                NV_PRINTF(LEVEL_ERROR, "TSG %d Channel %d shares a pVAS with channel %d\n",
+                          pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->grpID,
+                          kchannelGetDebugTag(pKernelChannel), 
+                          kchannelGetDebugTag(pChanNode->pKernelChannel));
+                return;
+            }
        }
    }

--- a/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_ga102.c
+++ b/src/nvidia/src/kernel/gpu/gsp/arch/ampere/kernel_gsp_ga102.c
@ -310,32 +310,6 @@ kgspExecuteSequencerCommand_GA102

    switch (opCode)
    {
-        case GSP_SEQ_BUF_OPCODE_CORE_RESET:
-        {
-            NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
-
-            // Reset falcon
-            kflcnEnable_HAL(pGpu, pKernelFalcon, NV_FALSE);
-            kflcnEnable_HAL(pGpu, pKernelFalcon, NV_TRUE);
-
-            kflcnDisableCtxReq_HAL(pGpu, pKernelFalcon);
-            break;
-        }
-        case GSP_SEQ_BUF_OPCODE_CORE_START:
-        {
-            NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
-
-            kflcnStartCpu_HAL(pGpu, pKernelFalcon);
-            break;
-        }
-        case GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT:
-        {
-            NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
-
-            // Wait for the bootloader to complete execution.
-            status = kflcnWaitForHalt_HAL(pGpu, pKernelFalcon, GPU_TIMEOUT_DEFAULT, 0);
-            break;
-        }
        case GSP_SEQ_BUF_OPCODE_CORE_RESUME:
        {
            RM_RISCV_UCODE_DESC *pRiscvDesc = pKernelGsp->pGspRmBootUcodeDesc;
--- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_frts_tu102.c
+++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_frts_tu102.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -398,7 +398,7 @@ s_executeFwsec_TU102
        {
            NV_PRINTF(LEVEL_ERROR, "failed to prepare interface data for FWSEC cmd 0x%x: 0x%x\n",
                      cmd, status);
-            return status;
+            goto out;
        }
    }
    else if (pFwsecUcode->bootType == KGSP_FLCN_UCODE_BOOT_WITH_LOADER)
@ -426,7 +426,7 @@ s_executeFwsec_TU102
        {
            NV_PRINTF(LEVEL_ERROR, "failed to prepare interface data for FWSEC cmd 0x%x: 0x%x\n",
                      cmd, status);
-            return status;
+            goto out;
        }
    }
    else
@ -440,7 +440,7 @@ s_executeFwsec_TU102
    if (status != NV_OK)
    {
        NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC cmd 0x%x: status 0x%x\n", cmd, status);
-        return status;
+        goto out;
    }

    if (cmd == FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_FRTS)
@ -456,7 +456,8 @@ s_executeFwsec_TU102
        if (frtsErrCode != NV_VBIOS_FWSECLIC_FRTS_ERR_CODE_NONE)
        {
            NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for FRTS: FRTS error code 0x%x\n", frtsErrCode);
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }

        data = GPU_REG_RD32(pGpu, NV_PFB_PRI_MMU_WPR2_ADDR_HI);
@ -464,7 +465,8 @@ s_executeFwsec_TU102
        if (wpr2HiVal == 0)
        {
            NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for FRTS: no initialized WPR2 found\n");
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }

        data = GPU_REG_RD32(pGpu, NV_PFB_PRI_MMU_WPR2_ADDR_LO);
@ -475,7 +477,8 @@ s_executeFwsec_TU102
            NV_PRINTF(LEVEL_ERROR,
                      "failed to execute FWSEC for FRTS: WPR2 initialized at an unexpected location: 0x%08x (expected 0x%08x)\n",
                      wpr2LoVal, expectedLoVal);
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }
    }
    else  // i.e. FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_SB
@ -487,14 +490,16 @@ s_executeFwsec_TU102
                                  _READ_PROTECTION_LEVEL0, _ENABLE))
        {
            NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: GFW PLM not lowered\n");
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }

        if (!GPU_FLD_TEST_DRF_DEF(pGpu, _PGC6, _AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT,
                                  _PROGRESS, _COMPLETED))
        {
            NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: GFW progress not completed\n");
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }

        data = GPU_REG_RD32(pGpu, NV_PBUS_VBIOS_SCRATCH(NV_VBIOS_FWSECLIC_SCRATCH_INDEX_15));
@ -502,10 +507,17 @@ s_executeFwsec_TU102
        if (sbErrCode != NV_VBIOS_FWSECLIC_SB_ERR_CODE_NONE)
        {
            NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: SB error code 0x%x\n", sbErrCode);
-            return NV_ERR_GENERIC;
+            status = NV_ERR_GENERIC;
+            goto out;
        }
    }

+out:
+    if (status != NV_OK)
+    {
+        NV_PRINTF(LEVEL_ERROR, "(note: VBIOS version %s)\n", pKernelGsp->vbiosVersionStr);
+    }
+
    return status;
 }

--- a/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c
+++ b/src/nvidia/src/kernel/gpu/gsp/arch/turing/kernel_gsp_tu102.c
@ -949,11 +949,12 @@ kgspWaitForGfwBootOk_TU102
    }

    // The wait failed if we reach here (as above loop returns upon success).
-    NV_PRINTF(LEVEL_ERROR, "failed to wait for GFW_BOOT: 0x%x (progress 0x%x)\n",
+    NV_PRINTF(LEVEL_ERROR, "failed to wait for GFW_BOOT: 0x%x (progress 0x%x, VBIOS version %s)\n",
              status, GPU_REG_RD_DRF(pGpu,
                        _PGC6,
                        _AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT,
-                        _PROGRESS));
+                        _PROGRESS),
+              pKernelGsp->vbiosVersionStr);
    NV_PRINTF(LEVEL_ERROR, "(the GPU may be in a bad state and may need to be reset)\n");

    return status;
--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp.c
@ -2147,6 +2147,26 @@ done:
    return nvStatus;
 }

+/*!
+ * Convert VBIOS version containing Version and OemVersion packed together to
+ * a string representation.
+ *
+ * Example:
+ *   for Version 0x05400001, OemVersion 0x12
+ *   input argument vbiosVersionCombined 0x0540000112
+ *   output str "5.40.00.01.12"
+ */
+static void
+_kgspVbiosVersionToStr(NvU64 vbiosVersionCombined, char *pVbiosVersionStr, NvU32 size)
+{
+    nvDbgSnprintf(pVbiosVersionStr, size, "%2X.%02X.%02X.%02X.%02X",
+                  (vbiosVersionCombined >> 32) & 0xff,
+                  (vbiosVersionCombined >> 24) & 0xff,
+                  (vbiosVersionCombined >> 16) & 0xff,
+                  (vbiosVersionCombined >> 8) & 0xff,
+                  (vbiosVersionCombined) & 0xff);
+}
+
 /*!
 * Initialize GSP-RM
 *
@ -2200,21 +2220,34 @@ kgspInitRm_IMPL
    {
        KernelGspVbiosImg *pVbiosImg = NULL;

+        // Start VBIOS version string as "unknown"
+        portStringCopy(pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr), "unknown", sizeof("unknown"));
+
        // Try and extract a VBIOS image.
        status = kgspExtractVbiosFromRom_HAL(pGpu, pKernelGsp, &pVbiosImg);

        if (status == NV_OK)
        {
+            NvU64 vbiosVersionCombined = 0;
+
            // Got a VBIOS image, now parse it for FWSEC.
            status = kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg,
-                                                        &pKernelGsp->pFwsecUcode);
+                                                     &pKernelGsp->pFwsecUcode, &vbiosVersionCombined);
            kgspFreeVbiosImg(pVbiosImg);
+
+            if (vbiosVersionCombined > 0)
+            {
+                _kgspVbiosVersionToStr(vbiosVersionCombined, pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr));
+            }
+
            if (status != NV_OK)
            {
-                NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode from VBIOS image: 0x%x\n",
-                            status);
+                NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode from VBIOS image (VBIOS version %s): 0x%x\n",
+                          pKernelGsp->vbiosVersionStr, status);
                goto done;
            }
+
+            NV_PRINTF(LEVEL_INFO, "parsed VBIOS version %s\n", pKernelGsp->vbiosVersionStr);
        }
        else if (status == NV_ERR_NOT_SUPPORTED)
        {
@ -2291,7 +2324,22 @@ kgspInitRm_IMPL
        goto done;
    }

-    status = kgspCalculateFbLayout(pGpu, pKernelGsp, pGspFw);
+    NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, _kgspInitLibosLogDecoder(pGpu, pKernelGsp, pGspFw), done);
+
+    // Wait for GFW_BOOT OK status
+    NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
+
+    // Fail early if WPR2 is up
+    if (kgspIsWpr2Up_HAL(pGpu, pKernelGsp))
+    {
+        NV_PRINTF(LEVEL_ERROR, "unexpected WPR2 already up, cannot proceed with booting gsp\n");
+        NV_PRINTF(LEVEL_ERROR, "(the GPU is likely in a bad state and may need to be reset)\n");
+        status = NV_ERR_INVALID_STATE;
+        goto done;
+    }
+
+    // Calculate FB layout (requires knowing FB size which depends on GFW_BOOT)
+    status = kgspCalculateFbLayout_HAL(pGpu, pKernelGsp, pGspFw);
    if (status != NV_OK)
    {
        NV_PRINTF(LEVEL_ERROR, "Error calculating FB layout\n");
@ -2326,20 +2374,6 @@ kgspInitRm_IMPL
        }
    }

-    NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, _kgspInitLibosLogDecoder(pGpu, pKernelGsp, pGspFw), done);
-
-    // Wait for GFW_BOOT OK status
-    NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
-
-    // Fail early if WPR2 is up
-    if (kgspIsWpr2Up_HAL(pGpu, pKernelGsp))
-    {
-        NV_PRINTF(LEVEL_ERROR, "unexpected WPR2 already up, cannot proceed with booting gsp\n");
-        NV_PRINTF(LEVEL_ERROR, "(the GPU is likely in a bad state and may need to be reset)\n");
-        status = NV_ERR_INVALID_STATE;
-        goto done;
-    }
-
    // bring up ucode with RM offload task
    status = kgspBootstrapRiscvOSEarly_HAL(pGpu, pKernelGsp, pGspFw);
    if (status != NV_OK)
@ -2447,6 +2481,19 @@ kgspUnloadRm_IMPL
        status = kgspExecuteBooterUnloadIfNeeded_HAL(pGpu, pKernelGsp, 0);
    }

+    //
+    // To fix boot issue after GPU reset on ESXi config:
+    // We still do not have root cause but looks like some sanity is failing during boot after reset is done.
+    // As temp WAR, add delay of 250 ms after gsp rm unload is done.
+    // Limit this to [VGPU-GSP] supported configs only and when we are in GPU RESET path.
+    //
+    if (API_GPU_IN_RESET_SANITY_CHECK(pGpu) &&
+        gpuIsSriovEnabled(pGpu) &&
+        IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
+    {
+        osDelay(250);
+    }
+
    if (rpcStatus != NV_OK)
    {
        return rpcStatus;
@ -2469,6 +2516,9 @@ kgspDestruct_IMPL
    if (!IS_GSP_CLIENT(pGpu))
        return;

+    // set VBIOS version string back to "unknown"
+    portStringCopy(pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr), "unknown", sizeof("unknown"));
+
    kgspFreeFlcnUcode(pKernelGsp->pFwsecUcode);
    pKernelGsp->pFwsecUcode = NULL;

@ -3372,6 +3422,31 @@ kgspExecuteSequencerBuffer_IMPL
                break;
            }

+            case GSP_SEQ_BUF_OPCODE_CORE_RESET:
+            {
+                NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
+
+                kflcnReset_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
+                kflcnDisableCtxReq_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
+                break;
+            }
+
+            case GSP_SEQ_BUF_OPCODE_CORE_START:
+            {
+                NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
+
+                kflcnStartCpu_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
+                break;
+            }
+
+            case GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT:
+            {
+                NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
+
+                NV_ASSERT_OK_OR_RETURN(kflcnWaitForHalt_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon), GPU_TIMEOUT_DEFAULT, 0));
+                break;
+            }
+
            default:
                //
                // Route this command to the arch-specific handler.
--- a/src/nvidia/src/kernel/gpu/gsp/kernel_gsp_fwsec.c
+++ b/src/nvidia/src/kernel/gpu/gsp/kernel_gsp_fwsec.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -67,6 +67,18 @@ struct BIT_TOKEN_V1_00
 #define BIT_TOKEN_V1_00_FMT "2b2w"
 typedef struct BIT_TOKEN_V1_00 BIT_TOKEN_V1_00;

+#define BIT_TOKEN_BIOSDATA          0x42
+
+// structure for only version info from BIT_DATA_BIOSDATA_V1 and BIT_DATA_BIOSDATA_V2
+typedef struct
+{
+    bios_U032 Version;     // BIOS Binary Version Ex. 5.40.00.01.12 = 0x05400001
+    bios_U008 OemVersion;  // OEM Version Number  Ex. 5.40.00.01.12 = 0x12
+} BIT_DATA_BIOSDATA_BINVER;
+
+#define BIT_DATA_BIOSDATA_BINVER_FMT "1d1b"
+#define BIT_DATA_BIOSDATA_BINVER_SIZE_5    5
+
 #define BIT_TOKEN_FALCON_DATA       0x70

 typedef struct
@ -441,6 +453,7 @@ s_vbiosFindBitHeader
 * @param[in]   bitAddr                 Offset of BIT header within VBIOS image
 * @param[in]   bUseDebugFwsec          Whether to look for debug or prod FWSEC
 * @param[out]  pFwsecUcodeDescFromBit  Resulting ucode desc
+ * @param[out]  pVbiosVersionCombined   (optional) output VBIOS version
 */
 static NV_STATUS
 s_vbiosParseFwsecUcodeDescFromBit
@ -448,7 +461,8 @@ s_vbiosParseFwsecUcodeDescFromBit
    const KernelGspVbiosImg * const pVbiosImg,
    const NvU32 bitAddr,
    const NvBool bUseDebugFwsec,
-    FlcnUcodeDescFromBit *pFwsecUcodeDescFromBit  // out
+    FlcnUcodeDescFromBit *pFwsecUcodeDescFromBit,  // out
+    NvU64 *pVbiosVersionCombined  // out
 )
 {

@ -491,6 +505,26 @@ s_vbiosParseFwsecUcodeDescFromBit
            continue;
        }

+        // catch BIOSDATA token (for capturing VBIOS version)
+        if (pVbiosVersionCombined != NULL &&
+            bitToken.TokenId == BIT_TOKEN_BIOSDATA &&
+            ((bitToken.DataVersion == 1) || (bitToken.DataVersion == 2)) &&
+            bitToken.DataSize > BIT_DATA_BIOSDATA_BINVER_SIZE_5)
+        {
+            BIT_DATA_BIOSDATA_BINVER binver;
+            status = s_vbiosReadStructure(pVbiosImg, &binver,
+                                          bitToken.DataPtr, BIT_DATA_BIOSDATA_BINVER_FMT);
+            if (status != NV_OK)
+            {
+                NV_PRINTF(LEVEL_ERROR,
+                          "failed to read BIOSDATA (BIT token %u), skipping: 0x%x\n",
+                          tokIdx, status);
+                continue;
+            }
+
+            *pVbiosVersionCombined = (((NvU64) binver.Version) << 8) | ((NvU32) binver.OemVersion);
+        }
+
        // skip tokens that are not for falcon ucode data v2
        if (bitToken.TokenId != BIT_TOKEN_FALCON_DATA ||
            bitToken.DataVersion != 2 ||
@ -1037,10 +1071,11 @@ s_vbiosNewFlcnUcodeFromDesc
 * The resulting KernelGspFlcnUcode should be freed with kgspFlcnUcodeFree
 * after use.
 *
- * @param[in]   pGpu           OBJGPU pointer
- * @param[in]   pKernelGsp     KernelGsp pointer
- * @param[in]   pVbiosImg      VBIOS image
- * @param[out]  ppFwsecUcode   Pointer to resulting KernelGspFlcnUcode
+ * @param[in]   pGpu                    OBJGPU pointer
+ * @param[in]   pKernelGsp              KernelGsp pointer
+ * @param[in]   pVbiosImg               VBIOS image
+ * @param[out]  ppFwsecUcode            Pointer to resulting KernelGspFlcnUcode
+ * @param[out]  pVbiosVersionCombined   (optional) pointer to output VBIOS version
 */
 NV_STATUS
 kgspParseFwsecUcodeFromVbiosImg_IMPL
@ -1048,7 +1083,8 @@ kgspParseFwsecUcodeFromVbiosImg_IMPL
    OBJGPU *pGpu,
    KernelGsp *pKernelGsp,
    const KernelGspVbiosImg * const pVbiosImg,
-    KernelGspFlcnUcode **ppFwsecUcode  // out
+    KernelGspFlcnUcode **ppFwsecUcode,  // out
+    NvU64 *pVbiosVersionCombined  // out
 )
 {
    NV_STATUS status;
@ -1072,7 +1108,8 @@ kgspParseFwsecUcodeFromVbiosImg_IMPL
    }

    bUseDebugFwsec = kgspIsDebugModeEnabled_HAL(pGpu, pKernelGsp);
-    status = s_vbiosParseFwsecUcodeDescFromBit(pVbiosImg, bitAddr, bUseDebugFwsec, &fwsecUcodeDescFromBit);
+    status = s_vbiosParseFwsecUcodeDescFromBit(pVbiosImg, bitAddr, bUseDebugFwsec,
+                                               &fwsecUcodeDescFromBit, pVbiosVersionCombined);
    if (status != NV_OK)
    {
        NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode desc from VBIOS image: 0x%x\n", status);
--- a/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_utils_gm107.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/arch/maxwell/mem_utils_gm107.c
@ -70,7 +70,7 @@ static NV_STATUS _memUtilsAllocateUserD(OBJGPU *pGpu, MemoryManager *pMemoryMana
                                        NvHandle hDeviceId, OBJCHANNEL *pChannel);
 static NV_STATUS _memUtilsMapUserd_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager,
                           OBJCHANNEL *pChannel, NvHandle hClientId, NvHandle hDeviceId,
-                           NvHandle hChannelId);
+                           NvHandle hChannelId, NvBool bUseRmApiForBar1);
 static NV_STATUS _memUtilsAllocateReductionSema(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel);
 static NvU32     _ceChannelScheduleBatchWork_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel,
    RmPhysAddr src, NV_ADDRESS_SPACE srcAddressSpace, NvU32 srcCpuCacheAttrib,
@ -378,27 +378,28 @@ memmgrMemUtilsChannelInitialize_GM107
    OBJCHANNEL    *pChannel
 )
 {
-    NV_STATUS                      rmStatus;
-    NV_STATUS                      lockStatus;
-    RsClient                      *pRsClient;
-    NvHandle                       hClient;
-    NvHandle                       hDevice;                    // device handle
-    NvHandle                       hPhysMem;                   // memory handle
-    NvU64                          size;
-    NvHandle                       hChannel;                   // channel handle
-    NvHandle                       hErrNotifierVirt;
-    NvHandle                       hErrNotifierPhys;
-    NvHandle                       hPushBuffer;
-    RM_API                        *pRmApi              = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
-    Heap                          *pHeap               = GPU_GET_HEAP(pGpu);
-    NvBool                         bMIGInUse           = IS_MIG_IN_USE(pGpu);
-    NvU8                          *pErrNotifierCpuVA   = NULL;
-    NV_ADDRESS_SPACE               userdAddrSpace;
-    NV_ADDRESS_SPACE               pushBuffAddrSpace;
-    NV_ADDRESS_SPACE               gpFifoAddrSpace;
-    OBJSYS                        *pSys                = SYS_GET_INSTANCE();
-    OBJCL                         *pCl                 = SYS_GET_CL(pSys);
-    NvU32                          cacheSnoopFlag      = 0 ;
+    NV_STATUS         rmStatus;
+    NV_STATUS         lockStatus;
+    RsClient         *pRsClient;
+    NvHandle          hClient;
+    NvHandle          hDevice;
+    NvHandle          hPhysMem;
+    NvU64             size;
+    NvHandle          hChannel;
+    NvHandle          hErrNotifierVirt;
+    NvHandle          hErrNotifierPhys;
+    NvHandle          hPushBuffer;
+    RM_API           *pRmApi              = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+    Heap             *pHeap               = GPU_GET_HEAP(pGpu);
+    NvBool            bMIGInUse           = IS_MIG_IN_USE(pGpu);
+    NvU8             *pErrNotifierCpuVA   = NULL;
+    NV_ADDRESS_SPACE  userdAddrSpace;
+    NV_ADDRESS_SPACE  pushBuffAddrSpace;
+    NV_ADDRESS_SPACE  gpFifoAddrSpace;
+    OBJSYS           *pSys                = SYS_GET_INSTANCE();
+    OBJCL            *pCl                 = SYS_GET_CL(pSys);
+    NvU32             cacheSnoopFlag      = 0 ;
+    NvBool            bUseRmApiForBar1    = NV_FALSE;

    //
    // Heap alloc one chunk of memory to hold all of our alloc parameters to
@ -858,46 +859,64 @@ memmgrMemUtilsChannelInitialize_GM107
        rmStatus,
        LEVEL_ERROR,
        _memUtilsMapUserd_GM107(pGpu, pMemoryManager, pChannel,
-                                hClient, hDevice, hChannel),
+                                hClient, hDevice, hChannel, bUseRmApiForBar1),
        exit_free_client);

-    //
-    // map cpu pointer
-    // Map the pushbuffer memory to CPU viewable region
-    //
-    NV_CHECK_OK_OR_GOTO(
-        rmStatus,
-        LEVEL_ERROR,
-        pRmApi->MapToCpu(pRmApi,
-                         hClient,
-                         hDevice,
-                         hPhysMem,
-                         0,
-                         size,
-                         (void **)&pChannel->pbCpuVA,
-                         0),
-        exit_free_client);
+    // Set up pushbuffer and semaphore memdesc and memset the buffer
+    pChannel->pChannelBufferMemdesc = 
+        memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hPhysMem);
+    NV_ASSERT_OR_GOTO(pChannel->pChannelBufferMemdesc != NULL, exit_free_client);

-    portMemSet(pChannel->pbCpuVA, 0, (NvLength)size);
+    // Set up notifier memory
+    pChannel->pErrNotifierMemdesc = 
+        memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hErrNotifierPhys);
+    NV_ASSERT_OR_GOTO(pChannel->pErrNotifierMemdesc != NULL, exit_free_client);

-    // Map the notifier memory to CPU viewable region
-    NV_CHECK_OK_OR_GOTO(
-        rmStatus,
-        LEVEL_ERROR,
-        pRmApi->MapToCpu(pRmApi,
-                         hClient,
-                         hDevice,
-                         hErrNotifierPhys,
-                         0,
-                         pChannel->channelNotifierSize,
-                         (void **)&pErrNotifierCpuVA,
-                         0),
-        exit_free_client);
+    if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
+    {
+        rmStatus = memmgrMemDescMemSet(pMemoryManager, pChannel->pChannelBufferMemdesc, 0, 
+                                       (TRANSFER_FLAGS_SHADOW_ALLOC | TRANSFER_FLAGS_SHADOW_INIT_MEM));
+        NV_ASSERT_OR_GOTO(rmStatus == NV_OK, exit_free_client);

-    pChannel->pTokenFromNotifier =
-        (NvNotification *)(pErrNotifierCpuVA +
-                           (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN *
-                            sizeof(NvNotification)));
+        pChannel->pbCpuVA = NULL;
+        pChannel->pTokenFromNotifier = NULL;
+    }
+    else
+    {
+        if (bUseRmApiForBar1)
+        {
+            NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
+                pRmApi->MapToCpu(pRmApi, hClient, hDevice, hPhysMem, 0, size,
+                                 (void **)&pChannel->pbCpuVA, 0),
+                exit_free_client);
+
+            NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
+                pRmApi->MapToCpu(pRmApi, hClient, hDevice, hErrNotifierPhys, 0,
+                    pChannel->channelNotifierSize, (void **)&pErrNotifierCpuVA, 0),
+                exit_free_client);
+        }
+        else
+        {
+            // 
+            // Most use cases can migrate to the internal memdescMap path for BAR1
+            // And it is preferred because external path will not work with CC
+            //
+            pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager, 
+                                    pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1);
+            NV_ASSERT_OR_GOTO(pChannel->pbCpuVA != NULL, exit_free_client);
+
+            pErrNotifierCpuVA = memmgrMemDescBeginTransfer(pMemoryManager,
+                                    pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1);
+            NV_ASSERT_OR_GOTO(pErrNotifierCpuVA != NULL, exit_free_client);
+        }
+
+        portMemSet(pChannel->pbCpuVA, 0, (NvLength)size);
+
+        pChannel->pTokenFromNotifier =
+            (NvNotification *)(pErrNotifierCpuVA +
+                               (NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN *
+                                sizeof(NvNotification)));
+    }

    //
    // Allocate and map the doorbell region to use in scrub on free
@ -1188,7 +1207,12 @@ memmgrMemUtilsCopyEngineInitialize_GM107

    // initialize the channel parameters (should be done by the parent object)
    pChannel->channelPutOffset = 0;
-    MEM_WR32(pChannel->pbCpuVA + pChannel->semaOffset, 0);
+
+    if (pChannel->pbCpuVA != NULL)
+    {
+        MEM_WR32(pChannel->pbCpuVA + pChannel->semaOffset, 0);
+    }
+
    return NV_OK;

 exit_free:
@ -1300,25 +1324,48 @@ _memUtilsMapUserd_GM107
    OBJCHANNEL    *pChannel,
    NvHandle       hClientId,
    NvHandle       hDeviceId,
-    NvHandle       hChannelId
+    NvHandle       hChannelId,
+    NvBool         bUseRmApiForBar1
 )
 {
-    NvU32      userdSize;
-    RM_API    *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+    //
+    // The memTransfer API only works for client-allocated USERD
+    // because otherwise we are calling MapToCpu using the channel
+    // handle instead.
+    //
+    if (pChannel->bClientUserd && !bUseRmApiForBar1)
+    {
+        pChannel->pUserdMemdesc = 
+            memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, hClientId, pChannel->hUserD);
+        NV_ASSERT_OR_RETURN(pChannel->pUserdMemdesc != NULL, NV_ERR_GENERIC);

-    kfifoGetUserdSizeAlign_HAL(GPU_GET_KERNEL_FIFO(pGpu), &userdSize, NULL);
-
-    NV_CHECK_OK_OR_RETURN(
-        LEVEL_ERROR,
-        pRmApi->MapToCpu(pRmApi,
-                         hClientId,
-                         hDeviceId,
-                         pChannel->bClientUserd ? pChannel->hUserD : hChannelId,
-                         0,
-                         userdSize,
-                         (void **)&pChannel->pControlGPFifo,
-                         0));
+        if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
+        {
+            //
+            // GPFIFO aceess will not be set up in order to facilitate memTransfer APIs
+            // which will use GSP-DMA/CE with shadow buffers
+            //
+            pChannel->pControlGPFifo = NULL;
+        }
+        else
+        {
+            pChannel->pControlGPFifo =
+                (void *)memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pUserdMemdesc,
+                                                   TRANSFER_FLAGS_USE_BAR1);
+            NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
+        }
+    }
+    else
+    {
+        NvU32   userdSize = 0;
+        RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
+        kfifoGetUserdSizeAlign_HAL(GPU_GET_KERNEL_FIFO(pGpu), &userdSize, NULL);

+        NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
+            pRmApi->MapToCpu(pRmApi, hClientId, hDeviceId, 
+                             pChannel->bClientUserd ? pChannel->hUserD : hChannelId, 0,
+                             userdSize, (void **)&pChannel->pControlGPFifo, 0));
+    }
    return NV_OK;
 }

@ -1522,6 +1569,10 @@ memmgrMemUtilsMemSet_GM107
        return NV_ERR_GENERIC;

    }
+
+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
+
    if (pChannel->isProgressChecked)
    {
        // if progress is checked insert the semaphore with freeToken as payload
@ -1579,6 +1630,9 @@ memmgrMemUtilsMemSetBlocking_GM107

    }

+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
+
    blocksPushed = _ceChannelScheduleWork_GM107(pGpu, pMemoryManager, pChannel,
                       0, 0, 0,              // src parameters
                       base, ADDR_FBMEM, 0,  // dst parameters
@ -1644,6 +1698,9 @@ memmgrMemUtilsMemSetBatched_GM107
 {
    NvU32 blocksPushed = 0;

+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
+
    blocksPushed = _ceChannelScheduleBatchWork_GM107(pGpu, pMemoryManager, pChannel,
                       0, 0, 0,                          // src parameters
                       base, ADDR_FBMEM, 0,              // dst parameters
@ -1720,6 +1777,9 @@ memmgrMemUtilsMemCopyBatched_GM107
    NvU64            size
 )
 {
+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
+
    NvU32 blocksPushed = _ceChannelScheduleBatchWork_GM107(pGpu, pMemoryManager, pChannel,
                            src, srcAddressSpace, srcCpuCacheAttrib, // src parameters
                            dst, dstAddressSpace, dstCpuCacheAttrib, // dst parameters
@ -1897,6 +1957,8 @@ _getSpaceInPb(OBJCHANNEL *pChannel)
    NvU32 filledSpace;
    NvU32 avlblSpace;

+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
+
    if (pChannel->channelPutOffset >= MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset))
    {
        filledSpace = (pChannel->channelPutOffset - MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset));
@ -1952,6 +2014,8 @@ _ceChannelScheduleBatchWork_GM107

    spaceInPb = pChannel->channelPbSize - pChannel->channelPutOffset;
    NV_ASSERT_OR_RETURN(spaceInPb >= pChannel->methodSizePerBlock, 0);
+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, 0);

    // Support for sending semaphore release only work.
    if (size > 0)
@ -2046,6 +2110,9 @@ _ceChannelScheduleWork_GM107
    NvBool           addFinishPayload;
    NvU32            blockSize = 0;

+    NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
+    NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, 0);
+
    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);

    spaceInPb = _getSpaceInPb(pChannel);
@ -2224,6 +2291,9 @@ _ceChannelUpdateGpFifo_GM107
    KernelChannel   *pFifoKernelChannel;
    KernelFifo      *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);

+    NV_ASSERT_OR_RETURN_VOID(pChannel->pbCpuVA != NULL);
+    NV_ASSERT_OR_RETURN_VOID(pChannel->pControlGPFifo != NULL);
+
    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
    GPPut = MEM_RD32(&pChannel->pControlGPFifo->GPPut);
    GPGet = MEM_RD32(&pChannel->pControlGPFifo->GPGet);
--- a/Show More
+++ b/Show More