550.40.07

2024-01-24 17:51:53 +01:00 · 2024-01-24 17:51:53 +01:00 · 91676d6628
commit 91676d6628
parent bb2dac1f20
1411 changed files with 261367 additions and 145959 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,13 @@
 # Changelog

+## Release 550 Entries
+
+### [550.40.07] 2024-01-24
+
+#### Fixed
+
+- Set INSTALL_MOD_DIR only if it's not defined, [#570](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/570) by @keelung-yang
+
 ## Release 545 Entries

 ### [545.29.06] 2023-11-22
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 545.29.06.
+version 550.40.07.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-545.29.06 driver release.  This can be achieved by installing
+550.40.07 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -188,7 +188,7 @@ encountered specific to them.
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/545.29.06/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.07/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -683,6 +683,7 @@ Subsystem Device ID.
 | NVIDIA A800 40GB Active                         | 20F6 103C 180A |
 | NVIDIA A800 40GB Active                         | 20F6 10DE 180A |
 | NVIDIA A800 40GB Active                         | 20F6 17AA 180A |
+| NVIDIA AX800                                    | 20FD 10DE 17F8 |
 | NVIDIA GeForce GTX 1660 Ti                      | 2182           |
 | NVIDIA GeForce GTX 1660                         | 2184           |
 | NVIDIA GeForce GTX 1650 SUPER                   | 2187           |
@ -836,6 +837,7 @@ Subsystem Device ID.
 | NVIDIA RTX A2000 Embedded GPU                   | 25FA           |
 | NVIDIA RTX A500 Embedded GPU                    | 25FB           |
 | NVIDIA GeForce RTX 4090                         | 2684           |
+| NVIDIA GeForce RTX 4090 D                       | 2685           |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 1028 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 103C 16A1 |
 | NVIDIA RTX 6000 Ada Generation                  | 26B1 10DE 16A1 |
@ -844,16 +846,22 @@ Subsystem Device ID.
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 103C 17FA |
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 10DE 17FA |
 | NVIDIA RTX 5000 Ada Generation                  | 26B2 17AA 17FA |
+| NVIDIA RTX 5880 Ada Generation                  | 26B3 103C 1934 |
+| NVIDIA RTX 5880 Ada Generation                  | 26B3 10DE 1934 |
+| NVIDIA RTX 5880 Ada Generation                  | 26B3 17AA 1934 |
 | NVIDIA L40                                      | 26B5 10DE 169D |
 | NVIDIA L40                                      | 26B5 10DE 17DA |
 | NVIDIA L40S                                     | 26B9 10DE 1851 |
 | NVIDIA L40S                                     | 26B9 10DE 18CF |
+| NVIDIA L20                                      | 26BA 10DE 1957 |
 | NVIDIA GeForce RTX 4080                         | 2704           |
+| NVIDIA GeForce RTX 4070 Ti SUPER                | 2705           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2717           |
 | NVIDIA RTX 5000 Ada Generation Laptop GPU       | 2730           |
 | NVIDIA GeForce RTX 4090 Laptop GPU              | 2757           |
 | NVIDIA RTX 5000 Ada Generation Embedded GPU     | 2770           |
 | NVIDIA GeForce RTX 4070 Ti                      | 2782           |
+| NVIDIA GeForce RTX 4070 SUPER                   | 2783           |
 | NVIDIA GeForce RTX 4070                         | 2786           |
 | NVIDIA GeForce RTX 4080 Laptop GPU              | 27A0           |
 | NVIDIA RTX 4000 SFF Ada Generation              | 27B0 1028 16FA |
@ -868,6 +876,7 @@ Subsystem Device ID.
 | NVIDIA RTX 4000 Ada Generation                  | 27B2 103C 181B |
 | NVIDIA RTX 4000 Ada Generation                  | 27B2 10DE 181B |
 | NVIDIA RTX 4000 Ada Generation                  | 27B2 17AA 181B |
+| NVIDIA L2                                       | 27B6 10DE 1933 |
 | NVIDIA L4                                       | 27B8 10DE 16CA |
 | NVIDIA L4                                       | 27B8 10DE 16EE |
 | NVIDIA RTX 4000 Ada Generation Laptop GPU       | 27BA           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -70,9 +70,9 @@ $(foreach _module, $(NV_KERNEL_MODULES), \

 EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
-EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
+EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.29.06\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -134,6 +134,16 @@ ifneq ($(wildcard /proc/sgi_uv),)
 EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
 endif

+ifdef VGX_FORCE_VFIO_PCI_CORE
+ EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
+endif
+
+WARNINGS_AS_ERRORS ?=
+ifeq ($(WARNINGS_AS_ERRORS),1)
+ ccflags-y += -Werror
+else
+ ccflags-y += -Wno-error
+endif

 #
 # The conftest.sh script tests various aspects of the target kernel.
@ -160,6 +170,7 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
 NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)

 NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
+NV_CONFTEST_CFLAGS += -Wno-error

 NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
 NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h
@ -219,106 +230,7 @@ $(obj)/conftest/patches.h: $(NV_CONFTEST_SCRIPT)
 	@mkdir -p $(obj)/conftest
 	@$(NV_CONFTEST_CMD) patch_check > $@

-
-# Each of these headers is checked for presence with a test #include; a
-# corresponding #define will be generated in conftest/headers.h.
-NV_HEADER_PRESENCE_TESTS = \
- asm/system.h \
- drm/drmP.h \
- drm/drm_aperture.h \
- drm/drm_auth.h \
- drm/drm_gem.h \
- drm/drm_crtc.h \
- drm/drm_color_mgmt.h \
- drm/drm_atomic.h \
- drm/drm_atomic_helper.h \
- drm/drm_atomic_state_helper.h \
- drm/drm_encoder.h \
- drm/drm_atomic_uapi.h \
- drm/drm_drv.h \
- drm/drm_fbdev_generic.h \
- drm/drm_framebuffer.h \
- drm/drm_connector.h \
- drm/drm_probe_helper.h \
- drm/drm_blend.h \
- drm/drm_fourcc.h \
- drm/drm_prime.h \
- drm/drm_plane.h \
- drm/drm_vblank.h \
- drm/drm_file.h \
- drm/drm_ioctl.h \
- drm/drm_device.h \
- drm/drm_mode_config.h \
- drm/drm_modeset_lock.h \
- dt-bindings/interconnect/tegra_icc_id.h \
- generated/autoconf.h \
- generated/compile.h \
- generated/utsrelease.h \
- linux/efi.h \
- linux/kconfig.h \
- linux/platform/tegra/mc_utils.h \
- linux/printk.h \
- linux/ratelimit.h \
- linux/prio_tree.h \
- linux/log2.h \
- linux/of.h \
- linux/bug.h \
- linux/sched.h \
- linux/sched/mm.h \
- linux/sched/signal.h \
- linux/sched/task.h \
- linux/sched/task_stack.h \
- xen/ioemu.h \
- linux/fence.h \
- linux/dma-fence.h \
- linux/dma-resv.h \
- soc/tegra/chip-id.h \
- soc/tegra/fuse.h \
- soc/tegra/tegra_bpmp.h \
- video/nv_internal.h \
- linux/platform/tegra/dce/dce-client-ipc.h \
- linux/nvhost.h \
- linux/nvhost_t194.h \
- linux/host1x-next.h \
- asm/book3s/64/hash-64k.h \
- asm/set_memory.h \
- asm/prom.h \
- asm/powernv.h \
- linux/atomic.h \
- asm/barrier.h \
- asm/opal-api.h \
- sound/hdaudio.h \
- asm/pgtable_types.h \
- asm/page.h \
- linux/stringhash.h \
- linux/dma-map-ops.h \
- rdma/peer_mem.h \
- sound/hda_codec.h \
- linux/dma-buf.h \
- linux/time.h \
- linux/platform_device.h \
- linux/mutex.h \
- linux/reset.h \
- linux/of_platform.h \
- linux/of_device.h \
- linux/of_gpio.h \
- linux/gpio.h \
- linux/gpio/consumer.h \
- linux/interconnect.h \
- linux/pm_runtime.h \
- linux/clk.h \
- linux/clk-provider.h \
- linux/ioasid.h \
- linux/stdarg.h \
- linux/iosys-map.h \
- asm/coco.h \
- linux/vfio_pci_core.h \
- linux/mdev.h \
- soc/tegra/bpmp-abi.h \
- soc/tegra/bpmp.h \
- linux/sync_file.h \
- linux/cc_platform.h \
- asm/cpufeature.h
+include $(src)/header-presence-tests.mk

 # Filename to store the define for the header in $(1); this is only consumed by
 # the rule below that concatenates all of these together.
--- a/kernel-open/Makefile
+++ b/kernel-open/Makefile
@ -57,12 +57,15 @@ else
      -e 's/armv[0-7]\w\+/arm/' \
      -e 's/aarch64/arm64/' \
      -e 's/ppc64le/powerpc/' \
+      -e 's/riscv64/riscv/' \
    )
  endif

  NV_KERNEL_MODULES ?= $(wildcard nvidia nvidia-uvm nvidia-vgpu-vfio nvidia-modeset nvidia-drm nvidia-peermem)
  NV_KERNEL_MODULES := $(filter-out $(NV_EXCLUDE_KERNEL_MODULES), \
                                    $(NV_KERNEL_MODULES))
+  INSTALL_MOD_DIR ?= kernel/drivers/video
+
  NV_VERBOSE ?=
  SPECTRE_V2_RETPOLINE ?= 0

@ -74,7 +77,7 @@ else
  KBUILD_PARAMS += NV_KERNEL_SOURCES=$(KERNEL_SOURCES)
  KBUILD_PARAMS += NV_KERNEL_OUTPUT=$(KERNEL_OUTPUT)
  KBUILD_PARAMS += NV_KERNEL_MODULES="$(NV_KERNEL_MODULES)"
-  KBUILD_PARAMS += INSTALL_MOD_DIR=kernel/drivers/video
+  KBUILD_PARAMS += INSTALL_MOD_DIR="$(INSTALL_MOD_DIR)"
  KBUILD_PARAMS += NV_SPECTRE_V2=$(SPECTRE_V2_RETPOLINE)

  .PHONY: modules module clean clean_conftest modules_install
--- a/kernel-open/common/inc/nv-ioctl-numbers.h
+++ b/kernel-open/common/inc/nv-ioctl-numbers.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -39,5 +39,6 @@
 #define NV_ESC_QUERY_DEVICE_INTR     (NV_IOCTL_BASE + 13)
 #define NV_ESC_SYS_PARAMS            (NV_IOCTL_BASE + 14)
 #define NV_ESC_EXPORT_TO_DMABUF_FD   (NV_IOCTL_BASE + 17)
+#define NV_ESC_WAIT_OPEN_COMPLETE    (NV_IOCTL_BASE + 18)

 #endif
--- a/kernel-open/common/inc/nv-ioctl.h
+++ b/kernel-open/common/inc/nv-ioctl.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -142,4 +142,10 @@ typedef struct nv_ioctl_export_to_dma_buf_fd
    NvU32       status;
 } nv_ioctl_export_to_dma_buf_fd_t;

+typedef struct nv_ioctl_wait_open_complete
+{
+    int rc;
+    NvU32 adapterStatus;
+} nv_ioctl_wait_open_complete_t;
+
 #endif
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@ -35,6 +35,7 @@
 #include "os-interface.h"
 #include "nv-timer.h"
 #include "nv-time.h"
+#include "nv-chardev-numbers.h"

 #define NV_KERNEL_NAME "Linux"

@ -406,37 +407,6 @@ extern int nv_pat_mode;
 #define NV_GFP_DMA32 (NV_GFP_KERNEL)
 #endif

-extern NvBool nvos_is_chipset_io_coherent(void);
-
-#if defined(NVCPU_X86_64)
-#define CACHE_FLUSH()  asm volatile("wbinvd":::"memory")
-#define WRITE_COMBINE_FLUSH() asm volatile("sfence":::"memory")
-#elif defined(NVCPU_AARCH64)
-    static inline void nv_flush_cache_cpu(void *info)
-    {
-        if (!nvos_is_chipset_io_coherent())
-        {
-#if defined(NV_FLUSH_CACHE_ALL_PRESENT)
-            flush_cache_all();
-#else
-            WARN_ONCE(0, "NVRM: kernel does not support flush_cache_all()\n");
-#endif
-        }
-    }
-#define CACHE_FLUSH()            nv_flush_cache_cpu(NULL)
-#define CACHE_FLUSH_ALL()        on_each_cpu(nv_flush_cache_cpu, NULL, 1)
-#define WRITE_COMBINE_FLUSH()    mb()
-#elif defined(NVCPU_PPC64LE)
-#define CACHE_FLUSH()            asm volatile("sync;  \n" \
-                                              "isync; \n" ::: "memory")
-#define WRITE_COMBINE_FLUSH()    CACHE_FLUSH()
-#elif defined(NVCPU_RISCV64)
-#define CACHE_FLUSH()            mb()
-#define WRITE_COMBINE_FLUSH()    CACHE_FLUSH()
-#else
-#error "CACHE_FLUSH() and WRITE_COMBINE_FLUSH() need to be defined for this architecture."
-#endif
-
 typedef enum
 {
    NV_MEMORY_TYPE_SYSTEM,      /* Memory mapped for ROM, SBIOS and physical RAM. */
@ -1380,7 +1350,19 @@ typedef struct nv_dma_map_s {
         i < dm->mapping.discontig.submap_count;                              \
         i++, sm = &dm->mapping.discontig.submaps[i])

+/*
+ * On 4K ARM kernels, use max submap size a multiple of 64K to keep nv-p2p happy.
+ * Despite 4K OS pages, we still use 64K P2P pages due to dependent modules still using 64K.
+ * Instead of using (4G-4K), use max submap size as (4G-64K) since the mapped IOVA range
+ * must be aligned at 64K boundary.
+ */
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define NV_DMA_U32_MAX_4K_PAGES           ((NvU32)((NV_U32_MAX >> PAGE_SHIFT) + 1))
+#define NV_DMA_SUBMAP_MAX_PAGES           ((NvU32)(NV_DMA_U32_MAX_4K_PAGES - 16))
+#else
 #define NV_DMA_SUBMAP_MAX_PAGES           ((NvU32)(NV_U32_MAX >> PAGE_SHIFT))
+#endif
+
 #define NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(s)  (s * NV_DMA_SUBMAP_MAX_PAGES)

 /*
@ -1460,6 +1442,11 @@ typedef struct coherent_link_info_s {
     * baremetal OS environment it is System Physical Address(SPA) and in the case
     * of virutalized OS environment it is Intermediate Physical Address(IPA) */
    NvU64 gpu_mem_pa;
+
+    /* Physical address of the reserved portion of the GPU memory, applicable
+     * only in Grace Hopper self hosted passthrough virtualizatioan platform. */
+    NvU64 rsvd_mem_pa;
+
    /* Bitmap of NUMA node ids, corresponding to the reserved PXMs,
     * available for adding GPU memory to the kernel as system RAM */
    DECLARE_BITMAP(free_node_bitmap, MAX_NUMNODES);
@ -1607,6 +1594,26 @@ typedef struct nv_linux_state_s {

    struct nv_dma_device dma_dev;
    struct nv_dma_device niso_dma_dev;
+
+    /*
+     * Background kthread for handling deferred open operations
+     * (e.g. from O_NONBLOCK).
+     *
+     * Adding to open_q and reading/writing is_accepting_opens
+     * are protected by nvl->open_q_lock (not nvl->ldata_lock).
+     * This allows new deferred open operations to be enqueued without
+     * blocking behind previous ones (which hold nvl->ldata_lock).
+     *
+     * Adding to open_q is only safe if is_accepting_opens is true.
+     * This prevents open operations from racing with device removal.
+     *
+     * Stopping open_q is only safe after setting is_accepting_opens to false.
+     * This ensures that the open_q (and the larger nvl structure) will
+     * outlive any of the open operations enqueued.
+     */
+    nv_kthread_q_t open_q;
+    NvBool is_accepting_opens;
+    struct semaphore open_q_lock;
 } nv_linux_state_t;

 extern nv_linux_state_t *nv_linux_devices;
@ -1656,7 +1663,7 @@ typedef struct

    nvidia_stack_t *sp;
    nv_alloc_t *free_list;
-    void *nvptr;
+    nv_linux_state_t *nvptr;
    nvidia_event_t *event_data_head, *event_data_tail;
    NvBool dataless_event_pending;
    nv_spinlock_t fp_lock;
@ -1667,6 +1674,12 @@ typedef struct
    nv_alloc_mapping_context_t mmap_context;
    struct address_space mapping;

+    nv_kthread_q_item_t open_q_item;
+    struct completion open_complete;
+    nv_linux_state_t *deferred_open_nvl;
+    int open_rc;
+    NV_STATUS adapter_status;
+
    struct list_head entry;
 } nv_linux_file_private_t;

@ -1675,6 +1688,21 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
    return container_of(nvfp, nv_linux_file_private_t, nvfp);
 }

+static inline int nv_wait_open_complete_interruptible(nv_linux_file_private_t *nvlfp)
+{
+    return wait_for_completion_interruptible(&nvlfp->open_complete);
+}
+
+static inline void nv_wait_open_complete(nv_linux_file_private_t *nvlfp)
+{
+    wait_for_completion(&nvlfp->open_complete);
+}
+
+static inline NvBool nv_is_open_complete(nv_linux_file_private_t *nvlfp)
+{
+    return completion_done(&nvlfp->open_complete);
+}
+
 #define NV_SET_FILE_PRIVATE(filep,data) ((filep)->private_data = (data))
 #define NV_GET_LINUX_FILE_PRIVATE(filep) ((nv_linux_file_private_t *)(filep)->private_data)

@ -1756,12 +1784,18 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
 extern NvU32 NVreg_EnableUserNUMAManagement;
 extern NvU32 NVreg_RegisterPCIDriver;
 extern NvU32 NVreg_EnableResizableBar;
+extern NvU32 NVreg_EnableNonblockingOpen;

 extern NvU32 num_probed_nv_devices;
 extern NvU32 num_nv_devices;

 #define NV_FILE_INODE(file) (file)->f_inode

+static inline int nv_is_control_device(struct inode *inode)
+{
+    return (minor((inode)->i_rdev) == NV_MINOR_DEVICE_NUMBER_CONTROL_DEVICE);
+}
+
 #if defined(NV_DOM0_KERNEL_PRESENT) || defined(NV_VGPU_KVM_BUILD)
 #define NV_VGX_HYPER
 #if defined(NV_XEN_IOEMU_INJECT_MSI)
@ -2040,4 +2074,7 @@ typedef enum
 #include <linux/clk-provider.h>
 #endif

+#define NV_EXPORT_SYMBOL(symbol)        EXPORT_SYMBOL_GPL(symbol)
+#define NV_CHECK_EXPORT_SYMBOL(symbol)  NV_IS_EXPORT_SYMBOL_PRESENT_##symbol
+
 #endif  /* _NV_LINUX_H_ */
--- a/kernel-open/common/inc/nv-lock.h
+++ b/kernel-open/common/inc/nv-lock.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -37,6 +37,7 @@

 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_PREEMPT_RT_FULL)
 typedef raw_spinlock_t            nv_spinlock_t;
+#define NV_DEFINE_SPINLOCK(lock)  DEFINE_RAW_SPINLOCK(lock)
 #define NV_SPIN_LOCK_INIT(lock)   raw_spin_lock_init(lock)
 #define NV_SPIN_LOCK_IRQ(lock)    raw_spin_lock_irq(lock)
 #define NV_SPIN_UNLOCK_IRQ(lock)  raw_spin_unlock_irq(lock)
@ -47,6 +48,7 @@ typedef raw_spinlock_t            nv_spinlock_t;
 #define NV_SPIN_UNLOCK_WAIT(lock) raw_spin_unlock_wait(lock)
 #else
 typedef spinlock_t                nv_spinlock_t;
+#define NV_DEFINE_SPINLOCK(lock)  DEFINE_SPINLOCK(lock)
 #define NV_SPIN_LOCK_INIT(lock)   spin_lock_init(lock)
 #define NV_SPIN_LOCK_IRQ(lock)    spin_lock_irq(lock)
 #define NV_SPIN_UNLOCK_IRQ(lock)  spin_unlock_irq(lock)
--- a/kernel-open/common/inc/nv-mm.h
+++ b/kernel-open/common/inc/nv-mm.h
@ -44,12 +44,18 @@ typedef int vm_fault_t;

 #include <linux/mm.h>
 #include <linux/sched.h>
-#if defined(NV_PIN_USER_PAGES_PRESENT)
+
+/*
+ * FreeBSD's pin_user_pages's conftest breaks since pin_user_pages is an inline
+ * function. Because it simply maps to get_user_pages, we can just replace
+ * NV_PIN_USER_PAGES with NV_GET_USER_PAGES on FreeBSD
+ */
+#if defined(NV_PIN_USER_PAGES_PRESENT) && !defined(NV_BSD)
    #if defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS)
-        #define NV_PIN_USER_PAGES pin_user_pages
+        #define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages) \
+            pin_user_pages(start, nr_pages, gup_flags, pages, NULL)
    #else
-        #define NV_PIN_USER_PAGES(start, nr_pages, gup_flags, pages, vmas) \
-            pin_user_pages(start, nr_pages, gup_flags, pages)
+        #define NV_PIN_USER_PAGES pin_user_pages
    #endif // NV_PIN_USER_PAGES_HAS_ARGS_VMAS
    #define NV_UNPIN_USER_PAGE unpin_user_page
 #else
@ -80,29 +86,28 @@ typedef int vm_fault_t;
 */

 #if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
-    #define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
-        get_user_pages(start, nr_pages, flags, pages)
-#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
    #define NV_GET_USER_PAGES get_user_pages
+#elif defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS)
+    #define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
+        get_user_pages(start, nr_pages, flags, pages, NULL)
 #elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS_VMAS)
-    #define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
-        get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
+    #define NV_GET_USER_PAGES(start, nr_pages, flags, pages) \
+        get_user_pages(current, current->mm, start, nr_pages, flags, pages, NULL)
 #else
    static inline long NV_GET_USER_PAGES(unsigned long start,
                                         unsigned long nr_pages,
                                         unsigned int flags,
-                                         struct page **pages,
-                                         struct vm_area_struct **vmas)
+                                         struct page **pages)
    {
        int write = flags & FOLL_WRITE;
        int force = flags & FOLL_FORCE;

    #if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS)
-        return get_user_pages(start, nr_pages, write, force, pages, vmas);
+        return get_user_pages(start, nr_pages, write, force, pages, NULL);
    #else
        // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
        return get_user_pages(current, current->mm, start, nr_pages, write,
-                              force, pages, vmas);
+                              force, pages, NULL);
    #endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE_VMAS
    }
 #endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
@ -124,13 +129,13 @@ typedef int vm_fault_t;

 #if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
    #if defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS)
-        #define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
+        #define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)
    #elif defined(NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_VMAS)
-        #define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
+        #define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            pin_user_pages_remote(mm, start, nr_pages, flags, pages, NULL, locked)
    #else
-        #define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            pin_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
+        #define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
    #endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK_VMAS
 #else
    #define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
@ -166,19 +171,19 @@ typedef int vm_fault_t;

 #if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
    #if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
-        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            get_user_pages_remote(mm, start, nr_pages, flags, pages, locked)
-
-    #elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
        #define NV_GET_USER_PAGES_REMOTE get_user_pages_remote

+    #elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED_VMAS)
+        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            get_user_pages_remote(mm, start, nr_pages, flags, pages, NULL, locked)
+
    #elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED_VMAS)
-        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
+        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL, locked)

    #elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_VMAS)
-        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
+        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, NULL)

    #else
        // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE_VMAS
@ -187,14 +192,13 @@ typedef int vm_fault_t;
                                                    unsigned long nr_pages,
                                                    unsigned int flags,
                                                    struct page **pages,
-                                                    struct vm_area_struct **vmas,
                                                    int *locked)
        {
            int write = flags & FOLL_WRITE;
            int force = flags & FOLL_FORCE;

            return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
-                                         pages, vmas);
+                                         pages, NULL);
        }
    #endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
 #else
@ -204,18 +208,17 @@ typedef int vm_fault_t;
                                                    unsigned long nr_pages,
                                                    unsigned int flags,
                                                    struct page **pages,
-                                                    struct vm_area_struct **vmas,
                                                    int *locked)
        {
            int write = flags & FOLL_WRITE;
            int force = flags & FOLL_FORCE;

-            return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
+            return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, NULL);
        }

    #else
-        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
-            get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
+        #define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, locked) \
+            get_user_pages(NULL, mm, start, nr_pages, flags, pages, NULL)
    #endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE_VMAS
 #endif // NV_GET_USER_PAGES_REMOTE_PRESENT

--- a/kernel-open/common/inc/nv-pgprot.h
+++ b/kernel-open/common/inc/nv-pgprot.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -60,6 +60,7 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
 #endif /* !defined(NV_VMWARE) */

 #if defined(NVCPU_AARCH64)
+extern NvBool nvos_is_chipset_io_coherent(void);
 /*
 * Don't rely on the kernel's definition of pgprot_noncached(), as on 64-bit
 * ARM that's not for system memory, but device memory instead. For I/O cache
--- a/kernel-open/common/inc/nv-procfs-utils.h
+++ b/kernel-open/common/inc/nv-procfs-utils.h
@ -92,6 +92,24 @@ typedef struct file_operations nv_proc_ops_t;
 #endif

 #define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock)                     \
+    static ssize_t nv_procfs_read_lock_##name(                              \
+        struct file *file,                                                  \
+        char __user *buf,                                                   \
+        size_t size,                                                        \
+        loff_t *ppos                                                        \
+    )                                                                       \
+    {                                                                       \
+        int ret;                                                            \
+        ret = nv_down_read_interruptible(&lock);                            \
+        if (ret < 0)                                                        \
+        {                                                                   \
+            return ret;                                                     \
+        }                                                                   \
+        size = seq_read(file, buf, size, ppos);                             \
+        up_read(&lock);                                                     \
+        return size;                                                        \
+    }                                                                       \
+                                                                            \
    static int nv_procfs_open_##name(                                       \
        struct inode *inode,                                                \
        struct file *filep                                                  \
@ -104,11 +122,6 @@ typedef struct file_operations nv_proc_ops_t;
        {                                                                   \
            return ret;                                                     \
        }                                                                   \
-        ret = nv_down_read_interruptible(&lock);                            \
-        if (ret < 0)                                                        \
-        {                                                                   \
-            single_release(inode, filep);                                   \
-        }                                                                   \
        return ret;                                                         \
    }                                                                       \
                                                                            \
@ -117,7 +130,6 @@ typedef struct file_operations nv_proc_ops_t;
        struct file *filep                                                  \
    )                                                                       \
    {                                                                       \
-        up_read(&lock);                                                     \
        return single_release(inode, filep);                                \
    }

@ -127,46 +139,7 @@ typedef struct file_operations nv_proc_ops_t;
    static const nv_proc_ops_t nv_procfs_##name##_fops = {                  \
        NV_PROC_OPS_SET_OWNER()                                             \
        .NV_PROC_OPS_OPEN    = nv_procfs_open_##name,                       \
-        .NV_PROC_OPS_READ    = seq_read,                                    \
-        .NV_PROC_OPS_LSEEK   = seq_lseek,                                   \
-        .NV_PROC_OPS_RELEASE = nv_procfs_release_##name,                    \
-    };
-
-
-#define NV_DEFINE_SINGLE_PROCFS_FILE_READ_WRITE(name, lock,                 \
-write_callback)                                                             \
-    NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock)                         \
-                                                                            \
-    static ssize_t nv_procfs_write_##name(                                  \
-        struct file *file,                                                  \
-        const char __user *buf,                                             \
-        size_t size,                                                        \
-        loff_t *ppos                                                        \
-    )                                                                       \
-    {                                                                       \
-        ssize_t ret;                                                        \
-        struct seq_file *s;                                                 \
-                                                                            \
-        s = file->private_data;                                             \
-        if (s == NULL)                                                      \
-        {                                                                   \
-            return -EIO;                                                    \
-        }                                                                   \
-                                                                            \
-        ret = write_callback(s, buf + *ppos, size - *ppos);                 \
-        if (ret == 0)                                                       \
-        {                                                                   \
-            /* avoid infinite loop */                                       \
-            ret = -EIO;                                                     \
-        }                                                                   \
-        return ret;                                                         \
-    }                                                                       \
-                                                                            \
-    static const nv_proc_ops_t nv_procfs_##name##_fops = {                  \
-        NV_PROC_OPS_SET_OWNER()                                             \
-        .NV_PROC_OPS_OPEN    = nv_procfs_open_##name,                       \
-        .NV_PROC_OPS_READ    = seq_read,                                    \
-        .NV_PROC_OPS_WRITE   = nv_procfs_write_##name,                      \
+        .NV_PROC_OPS_READ    = nv_procfs_read_lock_##name,                  \
        .NV_PROC_OPS_LSEEK   = seq_lseek,                                   \
        .NV_PROC_OPS_RELEASE = nv_procfs_release_##name,                    \
    };
--- a/kernel-open/common/inc/nv-proto.h
+++ b/kernel-open/common/inc/nv-proto.h
@ -88,4 +88,7 @@ int           nv_linux_add_device_locked(nv_linux_state_t *);
 void          nv_linux_remove_device_locked(nv_linux_state_t *);
 NvBool        nv_acpi_power_resource_method_present(struct pci_dev *);

+int           nv_linux_init_open_q(nv_linux_state_t *);
+void          nv_linux_stop_open_q(nv_linux_state_t *);
+
 #endif /* _NV_PROTO_H_ */
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@ -221,7 +221,6 @@ typedef struct
 #define NV_RM_PAGE_MASK     (NV_RM_PAGE_SIZE - 1)

 #define NV_RM_TO_OS_PAGE_SHIFT      (os_page_shift - NV_RM_PAGE_SHIFT)
-#define NV_RM_PAGES_PER_OS_PAGE     (1U << NV_RM_TO_OS_PAGE_SHIFT)
 #define NV_RM_PAGES_TO_OS_PAGES(count) \
    ((((NvUPtr)(count)) >> NV_RM_TO_OS_PAGE_SHIFT) + \
     ((((count) & ((1 << NV_RM_TO_OS_PAGE_SHIFT) - 1)) != 0) ? 1 : 0))
@ -467,12 +466,6 @@ typedef struct nv_state_t
        NvHandle hDisp;
    } rmapi;

-    /* Bool to check if ISO iommu enabled */
-    NvBool iso_iommu_present;
-
-    /* Bool to check if NISO iommu enabled */
-    NvBool niso_iommu_present;
-
    /* Bool to check if dma-buf is supported */
    NvBool dma_buf_supported;

@ -484,6 +477,22 @@ typedef struct nv_state_t

    /* Bool to check if the GPU has a coherent sysmem link */
    NvBool coherent;
+
+    /*
+     * NUMA node ID of the CPU to which the GPU is attached.
+     * Holds NUMA_NO_NODE on platforms that don't support NUMA configuration.
+     */
+    NvS32 cpu_numa_node_id;
+
+    struct {
+        /* Bool to check if ISO iommu enabled */
+        NvBool iso_iommu_present;
+        /* Bool to check if NISO iommu enabled */
+        NvBool niso_iommu_present;
+        /* Display SMMU Stream IDs */
+        NvU32 dispIsoStreamId;
+        NvU32 dispNisoStreamId;
+    } iommus;
 } nv_state_t;

 // These define need to be in sync with defines in system.h
@ -613,10 +622,10 @@ typedef enum
        (((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)

 #define NV_SOC_IS_ISO_IOMMU_PRESENT(nv)     \
-        ((nv)->iso_iommu_present)
+        ((nv)->iommus.iso_iommu_present)

 #define NV_SOC_IS_NISO_IOMMU_PRESENT(nv)     \
-        ((nv)->niso_iommu_present)
+        ((nv)->iommus.niso_iommu_present)
 /*
 * GPU add/remove events
 */
@ -779,8 +788,6 @@ NV_STATUS NV_API_CALL   nv_register_phys_pages   (nv_state_t *, NvU64 *, NvU64,
 void      NV_API_CALL   nv_unregister_phys_pages (nv_state_t *, void *);

 NV_STATUS  NV_API_CALL  nv_dma_map_sgt           (nv_dma_device_t *, NvU64, NvU64 *, NvU32, void **);
-NV_STATUS  NV_API_CALL  nv_dma_map_pages         (nv_dma_device_t *, NvU64, NvU64 *, NvBool, NvU32, void **);
-NV_STATUS  NV_API_CALL  nv_dma_unmap_pages       (nv_dma_device_t *, NvU64, NvU64 *, void **);

 NV_STATUS  NV_API_CALL  nv_dma_map_alloc         (nv_dma_device_t *, NvU64, NvU64 *, NvBool, void **);
 NV_STATUS  NV_API_CALL  nv_dma_unmap_alloc       (nv_dma_device_t *, NvU64, NvU64 *, void **);
@ -830,7 +837,7 @@ void       NV_API_CALL  nv_put_firmware(const void *);
 nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
 void               NV_API_CALL nv_put_file_private(void *);

-NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
+NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
 NV_STATUS NV_API_CALL nv_get_egm_info(nv_state_t *, NvU64 *, NvU64 *, NvS32 *);

 NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
@ -877,9 +884,9 @@ struct drm_gem_object;

 NV_STATUS NV_API_CALL nv_dma_import_sgt  (nv_dma_device_t *, struct sg_table *, struct drm_gem_object *);
 void NV_API_CALL nv_dma_release_sgt(struct sg_table *, struct drm_gem_object *);
-NV_STATUS NV_API_CALL nv_dma_import_dma_buf      (nv_dma_device_t *, struct dma_buf *, NvU32 *, void **, struct sg_table **, nv_dma_buf_t **);
-NV_STATUS NV_API_CALL nv_dma_import_from_fd      (nv_dma_device_t *, NvS32, NvU32 *, void **, struct sg_table **, nv_dma_buf_t **);
-void      NV_API_CALL nv_dma_release_dma_buf     (void *, nv_dma_buf_t *);
+NV_STATUS NV_API_CALL nv_dma_import_dma_buf      (nv_dma_device_t *, struct dma_buf *, NvU32 *, struct sg_table **, nv_dma_buf_t **);
+NV_STATUS NV_API_CALL nv_dma_import_from_fd      (nv_dma_device_t *, NvS32, NvU32 *, struct sg_table **, nv_dma_buf_t **);
+void      NV_API_CALL nv_dma_release_dma_buf     (nv_dma_buf_t *);

 void      NV_API_CALL nv_schedule_uvm_isr        (nv_state_t *);

@ -895,6 +902,8 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
 NV_STATUS NV_API_CALL nv_get_num_phys_pages      (void *, NvU32 *);
 NV_STATUS NV_API_CALL nv_get_phys_pages          (void *, void *, NvU32 *);

+void      NV_API_CALL nv_get_disp_smmu_stream_ids (nv_state_t *, NvU32 *, NvU32 *);
+
 /*
 * ---------------------------------------------------------------------------
 *
@ -921,6 +930,7 @@ NV_STATUS  NV_API_CALL  rm_ioctl                 (nvidia_stack_t *, nv_state_t *
 NvBool     NV_API_CALL  rm_isr                   (nvidia_stack_t *, nv_state_t *, NvU32 *);
 void       NV_API_CALL  rm_isr_bh                (nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL  rm_isr_bh_unlocked       (nvidia_stack_t *, nv_state_t *);
+NvBool     NV_API_CALL  rm_is_msix_allowed       (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_power_management      (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
 NV_STATUS  NV_API_CALL  rm_stop_user_channels    (nvidia_stack_t *, nv_state_t *);
 NV_STATUS  NV_API_CALL  rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
@ -940,6 +950,7 @@ void       NV_API_CALL  rm_parse_option_string   (nvidia_stack_t *, const char *
 char*      NV_API_CALL  rm_remove_spaces         (const char *);
 char*      NV_API_CALL  rm_string_token          (char **, const char);
 void       NV_API_CALL  rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
+NV_STATUS  NV_API_CALL  rm_get_adapter_status_external(nvidia_stack_t *, nv_state_t *);

 NV_STATUS  NV_API_CALL  rm_run_rc_callback       (nvidia_stack_t *, nv_state_t *);
 void       NV_API_CALL  rm_execute_work_item     (nvidia_stack_t *, void *);
--- a/kernel-open/common/inc/nv_uvm_interface.h
+++ b/kernel-open/common/inc/nv_uvm_interface.h
@ -62,10 +62,10 @@ typedef struct
 /*******************************************************************************
    nvUvmInterfaceRegisterGpu

-    Registers the GPU with the provided UUID for use. A GPU must be registered
-    before its UUID can be used with any other API. This call is ref-counted so
-    every nvUvmInterfaceRegisterGpu must be paired with a corresponding
-    nvUvmInterfaceUnregisterGpu.
+    Registers the GPU with the provided physical UUID for use. A GPU must be
+    registered before its UUID can be used with any other API. This call is
+    ref-counted so every nvUvmInterfaceRegisterGpu must be paired with a
+    corresponding nvUvmInterfaceUnregisterGpu.

    You don't need to call nvUvmInterfaceSessionCreate before calling this.

@ -79,12 +79,13 @@ NV_STATUS nvUvmInterfaceRegisterGpu(const NvProcessorUuid *gpuUuid, UvmGpuPlatfo
 /*******************************************************************************
    nvUvmInterfaceUnregisterGpu

-    Unregisters the GPU with the provided UUID. This drops the ref count from
-    nvUvmInterfaceRegisterGpu. Once the reference count goes to 0 the device may
-    no longer be accessible until the next nvUvmInterfaceRegisterGpu call. No
-    automatic resource freeing is performed, so only make the last unregister
-    call after destroying all your allocations associated with that UUID (such
-    as those from nvUvmInterfaceAddressSpaceCreate).
+    Unregisters the GPU with the provided physical UUID. This drops the ref
+    count from nvUvmInterfaceRegisterGpu. Once the reference count goes to 0
+    the device may no longer be accessible until the next
+    nvUvmInterfaceRegisterGpu call. No automatic resource freeing is performed,
+    so only make the last unregister call after destroying all your allocations
+    associated with that UUID (such as those from
+    nvUvmInterfaceAddressSpaceCreate).

    If the UUID is not found, no operation is performed.
 */
@ -121,10 +122,10 @@ NV_STATUS nvUvmInterfaceSessionDestroy(uvmGpuSessionHandle session);
    nvUvmInterfaceDeviceCreate

    Creates a device object under the given session for the GPU with the given
-    UUID. Also creates a partition object for the device iff bCreateSmcPartition
-    is true and pGpuInfo->smcEnabled is true. pGpuInfo->smcUserClientInfo will
-    be used to determine the SMC partition in this case. A device handle is
-    returned in the device output parameter.
+    physical UUID. Also creates a partition object for the device iff
+    bCreateSmcPartition is true and pGpuInfo->smcEnabled is true.
+    pGpuInfo->smcUserClientInfo will be used to determine the SMC partition in
+    this case. A device handle is returned in the device output parameter.

    Error codes:
      NV_ERR_GENERIC
@ -161,6 +162,7 @@ void nvUvmInterfaceDeviceDestroy(uvmGpuDeviceHandle device);
 NV_STATUS nvUvmInterfaceAddressSpaceCreate(uvmGpuDeviceHandle device,
                                           unsigned long long vaBase,
                                           unsigned long long vaSize,
+                                           NvBool enableAts,
                                           uvmGpuAddressSpaceHandle *vaSpace,
                                           UvmGpuAddressSpaceInfo *vaSpaceInfo);

@ -422,33 +424,6 @@ NV_STATUS nvUvmInterfacePmaPinPages(void *pPma,
                                    NvU64 pageSize,
                                    NvU32 flags);

-/*******************************************************************************
-    nvUvmInterfacePmaUnpinPages
-
-    This function will unpin the physical memory allocated using PMA. The pages
-    passed as input must be already pinned, else this function will return an
-    error and rollback any change if any page is not previously marked "pinned".
-    Behaviour is undefined if any blacklisted pages are unpinned.
-
-    Arguments:
-        pPma[IN]             - Pointer to PMA object.
-        pPages[IN]           - Array of pointers, containing the PA base
-                               address of each page to be unpinned.
-        pageCount [IN]       - Number of pages required to be unpinned.
-        pageSize [IN]        - Page size of each page to be unpinned.
-
-    Error codes:
-        NV_ERR_INVALID_ARGUMENT       - Invalid input arguments.
-        NV_ERR_GENERIC                - Unexpected error. We try hard to avoid
-                                        returning this error code as is not very
-                                        informative.
-        NV_ERR_NOT_SUPPORTED          - Operation not supported on broken FB
-*/
-NV_STATUS nvUvmInterfacePmaUnpinPages(void *pPma,
-                                      NvU64 *pPages,
-                                      NvLength pageCount,
-                                      NvU64 pageSize);
-
 /*******************************************************************************
    nvUvmInterfaceMemoryFree

@ -638,6 +613,8 @@ NV_STATUS nvUvmInterfaceQueryCopyEnginesCaps(uvmGpuDeviceHandle device,
    nvUvmInterfaceGetGpuInfo

    Return various gpu info, refer to the UvmGpuInfo struct for details.
+    The input UUID is for the physical GPU and the pGpuClientInfo identifies
+    the SMC partition if SMC is enabled and the partition exists.
    If no gpu matching the uuid is found, an error will be returned.

    On Ampere+ GPUs, pGpuClientInfo contains SMC information provided by the
@ -645,6 +622,9 @@ NV_STATUS nvUvmInterfaceQueryCopyEnginesCaps(uvmGpuDeviceHandle device,

    Error codes:
      NV_ERR_GENERIC
+      NV_ERR_NO_MEMORY
+      NV_ERR_GPU_UUID_NOT_FOUND
+      NV_ERR_INSUFFICIENT_PERMISSIONS
      NV_ERR_INSUFFICIENT_RESOURCES
 */
 NV_STATUS nvUvmInterfaceGetGpuInfo(const NvProcessorUuid *gpuUuid,
@ -857,7 +837,7 @@ NV_STATUS nvUvmInterfaceGetEccInfo(uvmGpuDeviceHandle device,
        UVM GPU UNLOCK

    Arguments:
-        gpuUuid[IN]          - UUID of the GPU to operate on
+        device[IN]           - Device handle associated with the gpu
        bOwnInterrupts       - Set to NV_TRUE for UVM to take ownership of the
                               replayable page fault interrupts. Set to NV_FALSE
                               to return ownership of the page fault interrupts
@ -973,6 +953,7 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
    NOTES:
    - This function DOES NOT acquire the RM API or GPU locks. That is because
    it is called during fault servicing, which could produce deadlocks.
+    - This function should not be called when interrupts are disabled.

    Arguments:
        device[IN]        - Device handle associated with the gpu
@ -982,6 +963,27 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
 */
 NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);

+/*******************************************************************************
+    nvUvmInterfaceTogglePrefetchFaults
+
+    This function sends an RPC to GSP in order to toggle the prefetch fault PRI.
+
+    NOTES:
+    - This function DOES NOT acquire the RM API or GPU locks. That is because
+    it is called during fault servicing, which could produce deadlocks.
+    - This function should not be called when interrupts are disabled.
+
+    Arguments:
+        pFaultInfo[IN]        - Information provided by RM for fault handling.
+                                Used for obtaining the device handle without locks.
+        bEnable[IN]           - Instructs RM whether to toggle generating faults on
+                                prefetch on/off.
+
+    Error codes:
+      NV_ERR_INVALID_ARGUMENT
+*/
+NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
+
 /*******************************************************************************
    nvUvmInterfaceInitAccessCntrInfo

@ -1087,7 +1089,8 @@ void nvUvmInterfaceDeRegisterUvmOps(void);

    Error codes:
      NV_ERR_INVALID_ARGUMENT
-      NV_ERR_OBJECT_NOT_FOUND : If device object associated with the uuids aren't found.
+      NV_ERR_OBJECT_NOT_FOUND : If device object associated with the device
+                                handles isn't found.
 */
 NV_STATUS nvUvmInterfaceP2pObjectCreate(uvmGpuDeviceHandle device1,
                                        uvmGpuDeviceHandle device2,
@ -1140,6 +1143,8 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
        NV_ERR_NOT_READY                - Returned when querying the PTEs requires a deferred setup
                                          which has not yet completed. It is expected that the caller
                                          will reattempt the call until a different code is returned.
+                                          As an example, multi-node systems which require querying
+                                          PTEs from the Fabric Manager may return this code.
 */
 NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
                                             NvHandle hMemory,
@ -1449,18 +1454,7 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
                                                NvU32 methodStreamSize);

 /*******************************************************************************
-    CSL Interface and Locking
-
-    The following functions do not acquire the RM API or GPU locks and must not be called
-    concurrently with the same UvmCslContext parameter in different threads. The caller must
-    guarantee this exclusion.
-
-    * nvUvmInterfaceCslRotateIv
-    * nvUvmInterfaceCslEncrypt
-    * nvUvmInterfaceCslDecrypt
-    * nvUvmInterfaceCslSign
-    * nvUvmInterfaceCslQueryMessagePool
-    * nvUvmInterfaceCslIncrementIv
+    Cryptography Services Library (CSL) Interface
 */

 /*******************************************************************************
@ -1471,8 +1465,11 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
    The lifetime of the context is the same as the lifetime of the secure channel
    it is paired with.

+    Locking: This function acquires an API lock.
+    Memory : This function dynamically allocates memory.
+
    Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
+        uvmCslContext[IN/OUT] - The CSL context associated with a channel.
        channel[IN]           - Handle to a secure channel.

    Error codes:
@ -1490,11 +1487,33 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,

    If context is already deinitialized then function returns immediately.

+    Locking: This function does not acquire an API or GPU lock.
+    Memory : This function may free memory.
+
    Arguments:
-        uvmCslContext[IN] - The CSL context.
+        uvmCslContext[IN] - The CSL context associated with a channel.
 */
 void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);

+/*******************************************************************************
+    nvUvmInterfaceCslUpdateContext
+
+    Updates a context after a key rotation event and can only be called once per
+    key rotation event. Following a key rotation event, and before
+    nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
+    previous key can be decrypted with nvUvmInterfaceCslDecrypt.
+
+    Locking: This function acquires an API lock.
+    Memory : This function does not dynamically allocate memory.
+
+    Arguments:
+        uvmCslContext[IN] - The CSL context associated with a channel.
+
+    Error codes:
+        NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
+*/
+NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
+
 /*******************************************************************************
    nvUvmInterfaceCslRotateIv

@ -1509,11 +1528,13 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
    the channel must be idle before calling this function. This function can be
    called regardless of the value of the IV's message counter.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function attempts to acquire the GPU lock.
+             In case of failure to acquire the return code
+             is NV_ERR_STATE_IN_USE.
+    Memory : This function does not dynamically allocate memory.

 Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
+        uvmCslContext[IN/OUT] - The CSL context associated with a channel.
        operation[IN]         - Either
                                - UVM_CSL_OPERATION_ENCRYPT
                                - UVM_CSL_OPERATION_DECRYPT
@ -1521,7 +1542,11 @@ Arguments:
    Error codes:
      NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
                                      to overflow.
+      NV_ERR_STATE_IN_USE           - Unable to acquire lock / resource. Caller
+                                      can retry at a later time.
      NV_ERR_INVALID_ARGUMENT       - Invalid value for operation.
+      NV_ERR_GENERIC                - A failure other than _STATE_IN_USE occurred
+                                      when attempting to acquire a lock.
 */
 NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
                                    UvmCslOperation operation);
@ -1538,11 +1563,13 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
    The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
    However, it is optional. If it is NULL, the next IV in line will be used.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function does not acquire an API or GPU lock.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.
+    Memory : This function does not dynamically allocate memory.

 Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
+        uvmCslContext[IN/OUT] - The CSL context associated with a channel.
        bufferSize[IN]        - Size of the input and output buffers in
                                units of bytes. Value can range from 1 byte
                                to (2^32) - 1 bytes.
@ -1553,7 +1580,8 @@ Arguments:
                                Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.

    Error codes:
-      NV_ERR_INVALID_ARGUMENT       - The size of the data is 0 bytes.
+      NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
+                              - The size of the data is 0 bytes.
                              - The encryptIv has already been used.
 */
 NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
@ -1573,8 +1601,10 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
    maximized when the input and output buffers are 16-byte aligned. This is
    natural alignment for AES block.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function does not acquire an API or GPU lock.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.
+    Memory : This function does not dynamically allocate memory.

    Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
@ -1616,11 +1646,13 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
    Auth and input buffers must not overlap. If they do then calling this function produces
    undefined behavior.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function does not acquire an API or GPU lock.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.
+    Memory : This function does not dynamically allocate memory.

    Arguments:
-        uvmCslContext[IN/OUT] - The CSL context.
+        uvmCslContext[IN/OUT] - The CSL context associated with a channel.
        bufferSize[IN]        - Size of the input buffer in units of bytes.
                                Value can range from 1 byte to (2^32) - 1 bytes.
        inputBuffer[IN]       - Address of plaintext input buffer.
@ -1629,7 +1661,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,

    Error codes:
      NV_ERR_INSUFFICIENT_RESOURCES - The signing operation would cause a counter overflow to occur.
-      NV_ERR_INVALID_ARGUMENT       - The size of the data is 0 bytes.
+      NV_ERR_INVALID_ARGUMENT       - The CSL context is not associated with a channel.
+                                    - The size of the data is 0 bytes.
 */
 NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
                                NvU32 bufferSize,
@ -1641,8 +1674,10 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,

    Returns the number of messages that can be encrypted before the message counter will overflow.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function does not acquire an API or GPU lock.
+    Memory : This function does not dynamically allocate memory.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.

    Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
@ -1666,8 +1701,10 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
    can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
    the returned IV can be used in nvUvmInterfaceCslDecrypt.

-    See "CSL Interface and Locking" for locking requirements.
-    This function does not perform dynamic memory allocation.
+    Locking: This function does not acquire an API or GPU lock.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.
+    Memory : This function does not dynamically allocate memory.

 Arguments:
        uvmCslContext[IN/OUT] - The CSL context.
@ -1675,7 +1712,7 @@ Arguments:
                                - UVM_CSL_OPERATION_ENCRYPT
                                - UVM_CSL_OPERATION_DECRYPT
        increment[IN]         - The amount by which the IV is incremented. Can be 0.
-        iv[out]               - If non-NULL, a buffer to store the incremented IV.
+        iv[OUT]               - If non-NULL, a buffer to store the incremented IV.

    Error codes:
      NV_ERR_INVALID_ARGUMENT       - The value of the operation parameter is illegal.
@ -1687,4 +1724,29 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
                                       NvU64 increment,
                                       UvmCslIv *iv);

+/*******************************************************************************
+    nvUvmInterfaceCslLogExternalEncryption
+
+    Checks and logs information about non-CSL encryptions, such as those that
+    originate from the GPU.
+
+    This function does not modify elements of the UvmCslContext.
+
+    Locking: This function does not acquire an API or GPU lock.
+    Memory : This function does not dynamically allocate memory.
+             If called concurrently in different threads with the same UvmCslContext
+             the caller must guarantee exclusion.
+
+    Arguments:
+        uvmCslContext[IN/OUT] - The CSL context.
+        bufferSize[OUT]       - The size of the buffer encrypted by the
+                                external entity in units of bytes.
+
+    Error codes:
+      NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
+                                      to overflow.
+*/
+NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
+                                                 NvU32 bufferSize);
+
 #endif // _NV_UVM_INTERFACE_H_
--- a/kernel-open/common/inc/nv_uvm_types.h
+++ b/kernel-open/common/inc/nv_uvm_types.h
@ -131,6 +131,8 @@ typedef struct UvmGpuMemoryInfo_tag
    //      This is only valid if deviceDescendant is NV_TRUE.
    //      When egm is NV_TRUE, this is also the UUID of the GPU
    //      for which EGM is local.
+    //      If the GPU has SMC enabled, the UUID is the GI UUID.
+    //      Otherwise, it is the UUID for the physical GPU.
    //      Note: If the allocation is owned by a device in
    //      an SLI group and the allocation is broadcast
    //      across the SLI group, this UUID will be any one
@ -544,6 +546,10 @@ typedef struct UvmGpuP2PCapsParams_tag
    // the GPUs are direct peers.
    NvU32 peerIds[2];

+    // Out: peerId[i] contains gpu[i]'s EGM peer id of gpu[1 - i]. Only defined
+    // if the GPUs are direct peers and EGM enabled in the system.
+    NvU32 egmPeerIds[2];
+
    // Out: UVM_LINK_TYPE
    NvU32 p2pLink;

@ -572,8 +578,11 @@ typedef struct UvmPlatformInfo_tag
    // Out: ATS (Address Translation Services) is supported
    NvBool atsSupported;

-    // Out: AMD SEV (Secure Encrypted Virtualization) is enabled
-    NvBool sevEnabled;
+    // Out: True if HW trusted execution, such as AMD's SEV-SNP or Intel's TDX,
+    // is enabled in the VM, indicating that Confidential Computing must be
+    // also enabled in the GPU(s); these two security features are either both
+    // enabled, or both disabled.
+    NvBool confComputingEnabled;
 } UvmPlatformInfo;

 typedef struct UvmGpuClientInfo_tag
@ -604,7 +613,8 @@ typedef struct UvmGpuInfo_tag
    // Printable gpu name
    char name[UVM_GPU_NAME_LENGTH];

-    // Uuid of this gpu
+    // Uuid of the physical GPU or GI UUID if nvUvmInterfaceGetGpuInfo()
+    // requested information for a valid SMC partition.
    NvProcessorUuid uuid;

    // Gpu architecture; NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_*
@ -688,8 +698,12 @@ typedef struct UvmGpuInfo_tag
    NvU64 nvswitchMemoryWindowStart;

    // local EGM properties
+    // NV_TRUE if EGM is enabled
    NvBool   egmEnabled;
+    // Peer ID to reach local EGM when EGM is enabled
    NvU8     egmPeerId;
+    // EGM base address to offset in the GMMU PTE entry for EGM mappings
+    NvU64    egmBaseAddr;
 } UvmGpuInfo;

 typedef struct UvmGpuFbInfo_tag
@ -778,14 +792,14 @@ typedef NV_STATUS (*uvmEventResume_t) (void);
 /*******************************************************************************
    uvmEventStartDevice
    This function will be called by the GPU driver once it has finished its
-    initialization to tell the UVM driver that this GPU has come up.
+    initialization to tell the UVM driver that this physical GPU has come up.
 */
 typedef NV_STATUS (*uvmEventStartDevice_t) (const NvProcessorUuid *pGpuUuidStruct);

 /*******************************************************************************
    uvmEventStopDevice
-    This function will be called by the GPU driver to let UVM know that a GPU
-    is going down.
+    This function will be called by the GPU driver to let UVM know that a
+    physical GPU is going down.
 */
 typedef NV_STATUS (*uvmEventStopDevice_t) (const NvProcessorUuid *pGpuUuidStruct);

@ -816,7 +830,7 @@ typedef NV_STATUS (*uvmEventServiceInterrupt_t) (void *pDeviceObject,
 /*******************************************************************************
    uvmEventIsrTopHalf_t
    This function will be called by the GPU driver to let UVM know
-    that an interrupt has occurred.
+    that an interrupt has occurred on the given physical GPU.

    Returns:
        NV_OK if the UVM driver handled the interrupt
@ -923,11 +937,6 @@ typedef struct UvmGpuFaultInfo_tag
        // CSL context used for performing decryption of replayable faults when
        // Confidential Computing is enabled.
        UvmCslContext cslCtx;
-
-        // Indicates whether UVM owns the replayable fault buffer.
-        // The value of this field is always NV_TRUE When Confidential Computing
-        // is disabled.
-        NvBool bUvmOwnsHwFaultBuffer;
    } replayable;
    struct
    {
--- a/kernel-open/common/inc/nvkms-api-types.h
+++ b/kernel-open/common/inc/nvkms-api-types.h
@ -58,6 +58,7 @@ typedef NvU32 NvKmsFrameLockHandle;
 typedef NvU32 NvKmsDeferredRequestFifoHandle;
 typedef NvU32 NvKmsSwapGroupHandle;
 typedef NvU32 NvKmsVblankSyncObjectHandle;
+typedef NvU32 NvKmsVblankSemControlHandle;

 struct NvKmsSize {
    NvU16 width;
--- a/kernel-open/common/inc/nvkms-kapi.h
+++ b/kernel-open/common/inc/nvkms-kapi.h
@ -490,6 +490,8 @@ typedef enum NvKmsKapiRegisterWaiterResultRec {
    NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED,
 } NvKmsKapiRegisterWaiterResult;

+typedef void NvKmsKapiSuspendResumeCallbackFunc(NvBool suspend);
+
 struct NvKmsKapiFunctionsTable {

    /*!
@ -1399,6 +1401,15 @@ struct NvKmsKapiFunctionsTable {
        NvU64 index,
        NvU64 new_value
    );
+
+    /*!
+     * Set the callback function for suspending and resuming the display system.
+     */
+    void
+    (*setSuspendResumeCallback)
+    (
+        NvKmsKapiSuspendResumeCallbackFunc *function
+    );
 };

 /** @} */
--- a/kernel-open/common/inc/nvmisc.h
+++ b/kernel-open/common/inc/nvmisc.h
@ -919,6 +919,9 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
 //
 #define NV_BIT_SET_128(b, lo, hi)              { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }

+// Get the number of elements the specified fixed-size array
+#define NV_ARRAY_ELEMENTS(x)                   ((sizeof(x)/sizeof((x)[0])))
+
 #ifdef __cplusplus
 }
 #endif //__cplusplus
--- a/kernel-open/common/inc/nvstatuscodes.h
+++ b/kernel-open/common/inc/nvstatuscodes.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2014-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -150,6 +150,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR,      0x00000078, "Nvlink Confi
 NV_STATUS_CODE(NV_ERR_RISCV_ERROR,                     0x00000079, "Generic RISC-V assert or halt")
 NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT,      0x0000007A, "Fabric Manager is not loaded")
 NV_STATUS_CODE(NV_ERR_ALREADY_SIGNALLED,               0x0000007B, "Semaphore Surface value already >= requested wait value")
+NV_STATUS_CODE(NV_ERR_QUEUE_TASK_SLOT_NOT_AVAILABLE,   0x0000007C, "PMU RPC error due to no queue slot available for this event")

 // Warnings:
 NV_STATUS_CODE(NV_WARN_HOT_SWITCH,                     0x00010001, "WARNING Hot switch")
--- a/kernel-open/common/inc/nvtypes.h
+++ b/kernel-open/common/inc/nvtypes.h
@ -145,7 +145,12 @@ typedef   signed short     NvS16; /* -32768 to 32767                         */
 #endif

 // Macro to build an NvU32 from four bytes, listed from msb to lsb
-#define NvU32_BUILD(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+#define NvU32_BUILD(a, b, c, d) \
+    ((NvU32)( \
+     (((NvU32)(a) & 0xff) << 24) | \
+     (((NvU32)(b) & 0xff) << 16) | \
+     (((NvU32)(c) & 0xff) << 8)  | \
+     (((NvU32)(d) & 0xff))))

 #if NVTYPES_USE_STDINT
 typedef uint32_t           NvV32; /* "void": enumerated or multiple fields   */
--- a/kernel-open/common/inc/os-interface.h
+++ b/kernel-open/common/inc/os-interface.h
@ -67,7 +67,6 @@ typedef struct os_wait_queue os_wait_queue;
 * ---------------------------------------------------------------------------
 */

-NvU64       NV_API_CALL  os_get_num_phys_pages       (void);
 NV_STATUS   NV_API_CALL  os_alloc_mem                (void **, NvU64);
 void        NV_API_CALL  os_free_mem                 (void *);
 NV_STATUS   NV_API_CALL  os_get_current_time         (NvU32 *, NvU32 *);
@ -105,7 +104,6 @@ void*       NV_API_CALL  os_map_kernel_space         (NvU64, NvU64, NvU32);
 void        NV_API_CALL  os_unmap_kernel_space       (void *, NvU64);
 void*       NV_API_CALL  os_map_user_space           (NvU64, NvU64, NvU32, NvU32, void **);
 void        NV_API_CALL  os_unmap_user_space         (void *, NvU64, void *);
-NV_STATUS   NV_API_CALL  os_flush_cpu_cache          (void);
 NV_STATUS   NV_API_CALL  os_flush_cpu_cache_all      (void);
 NV_STATUS   NV_API_CALL  os_flush_user_cache         (void);
 void        NV_API_CALL  os_flush_cpu_write_combine_buffer(void);
@ -230,14 +228,12 @@ extern NvBool os_dma_buf_enabled;
 * ---------------------------------------------------------------------------
 */

-#define NV_DBG_INFO       0x1
-#define NV_DBG_SETUP      0x2
+#define NV_DBG_INFO       0x0
+#define NV_DBG_SETUP      0x1
+#define NV_DBG_USERERRORS 0x2
 #define NV_DBG_WARNINGS   0x3
 #define NV_DBG_ERRORS     0x4
-#define NV_DBG_HW_ERRORS  0x5
-#define NV_DBG_FATAL      0x6

-#define NV_DBG_FORCE_LEVEL(level) ((level) | (1 << 8))

 void NV_API_CALL  out_string(const char *str);
 int  NV_API_CALL  nv_printf(NvU32 debuglevel, const char *printf_format, ...);
--- a/kernel-open/common/inc/rm-gpu-ops.h
+++ b/kernel-open/common/inc/rm-gpu-ops.h
@ -37,7 +37,7 @@ NV_STATUS  NV_API_CALL  rm_gpu_ops_create_session (nvidia_stack_t *, nvgpuSessio
 NV_STATUS  NV_API_CALL  rm_gpu_ops_destroy_session (nvidia_stack_t *, nvgpuSessionHandle_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_device_create (nvidia_stack_t *, nvgpuSessionHandle_t, const nvgpuInfo_t *, const NvProcessorUuid *, nvgpuDeviceHandle_t *, NvBool);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_device_destroy (nvidia_stack_t *, nvgpuDeviceHandle_t);
-NV_STATUS  NV_API_CALL  rm_gpu_ops_address_space_create(nvidia_stack_t *, nvgpuDeviceHandle_t, unsigned long long, unsigned long long, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
+NV_STATUS  NV_API_CALL  rm_gpu_ops_address_space_create(nvidia_stack_t *, nvgpuDeviceHandle_t, unsigned long long, unsigned long long, NvBool, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_dup_address_space(nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, nvgpuAddressSpaceHandle_t *, nvgpuAddressSpaceInfo_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_address_space_destroy(nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_memory_alloc_fb(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvLength, NvU64 *, nvgpuAllocInfo_t);
@ -45,7 +45,6 @@ NV_STATUS  NV_API_CALL  rm_gpu_ops_memory_alloc_fb(nvidia_stack_t *, nvgpuAddres
 NV_STATUS  NV_API_CALL  rm_gpu_ops_pma_alloc_pages(nvidia_stack_t *, void *, NvLength, NvU32 , nvgpuPmaAllocationOptions_t, NvU64 *);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_pma_free_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32, NvU32);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_pma_pin_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32, NvU32);
-NV_STATUS  NV_API_CALL  rm_gpu_ops_pma_unpin_pages(nvidia_stack_t *, void *, NvU64 *, NvLength , NvU32);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_get_pma_object(nvidia_stack_t *, nvgpuDeviceHandle_t, void **, const nvgpuPmaStatistics_t *);
 NV_STATUS  NV_API_CALL  rm_gpu_ops_pma_register_callbacks(nvidia_stack_t *sp, void *, nvPmaEvictPagesCallback, nvPmaEvictRangeCallback, void *);
 void       NV_API_CALL  rm_gpu_ops_pma_unregister_callbacks(nvidia_stack_t *sp, void *);
@ -77,6 +76,7 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceH
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
+NV_STATUS  NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
 NV_STATUS  NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
 NV_STATUS  NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
@ -103,6 +103,7 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n

 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_context_update(nvidia_stack_t *, struct ccslContext_t *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
@ -110,5 +111,6 @@ NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslCont
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
 NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
+NV_STATUS  NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU32);

 #endif
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@ -14,6 +14,13 @@ OUTPUT=$4
 XEN_PRESENT=1
 PREEMPT_RT_PRESENT=0

+# We also use conftest.sh on FreeBSD to check for which symbols are provided
+# by the linux kernel programming interface (linuxkpi) when compiling nvidia-drm.ko
+OS_FREEBSD=0
+if [ "$OS" = "FreeBSD" ] ; then
+    OS_FREEBSD=1
+fi
+
 # VGX_BUILD parameter defined only for VGX builds (vGPU Host driver)
 # VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
 # GRID_BUILD parameter defined only for GRID builds (GRID Guest driver)
@ -205,11 +212,6 @@ CONFTEST_PREAMBLE="#include \"conftest/headers.h\"
    #if defined(NV_LINUX_KCONFIG_H_PRESENT)
    #include <linux/kconfig.h>
    #endif
-    #if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
-    #include <generated/autoconf.h>
-    #else
-    #include <linux/autoconf.h>
-    #endif
    #if defined(CONFIG_XEN) && \
        defined(CONFIG_XEN_INTERFACE_VERSION) &&  !defined(__XEN_INTERFACE_VERSION__)
    #define __XEN_INTERFACE_VERSION__ CONFIG_XEN_INTERFACE_VERSION
@ -222,6 +224,17 @@ CONFTEST_PREAMBLE="#include \"conftest/headers.h\"
    #endif
    #endif"

+# FreeBSD's Linux compatibility does not have autoconf.h defined
+# anywhere yet, only add this part on Linux
+if [ ${OS_FREEBSD} -ne 1 ] ; then
+    CONFTEST_PREAMBLE="${CONFTEST_PREAMBLE}
+        #if defined(NV_GENERATED_AUTOCONF_H_PRESENT)
+        #include <generated/autoconf.h>
+        #else
+        #include <linux/autoconf.h>
+        #endif"
+fi
+
 test_configuration_option() {
    #
    # Check to see if the given configuration option is defined
@ -308,16 +321,57 @@ compile_check_conftest() {
    fi
 }

-export_symbol_present_conftest() {
-    #
-    # Check Module.symvers to see whether the given symbol is present.
-    #
+check_symbol_exists() {
+    # Check that the given symbol is available

    SYMBOL="$1"
    TAB='	'

-    if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL\(_GPL\)\?\s*\$" \
+    if [ ${OS_FREEBSD} -ne 1 ] ; then
+        # Linux:
+        # ------
+        #
+        # Check Module.symvers to see whether the given symbol is present.
+        #
+        if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL.*\$" \
                   "$OUTPUT/Module.symvers" >/dev/null 2>&1; then
+            return 0
+        fi
+    else
+        # FreeBSD:
+        # ------
+        #
+        # Check if any of the linuxkpi or drm kernel module files contain
+        # references to this symbol.
+
+        # Get the /boot/kernel/ and /boot/modules paths, convert the list to a
+        # space separated list instead of semicolon separated so we can iterate
+        # over it.
+        if [ -z "${CONFTEST_BSD_KMODPATHS}" ] ; then
+            KMODPATHS=`sysctl -n kern.module_path | sed -e "s/;/ /g"`
+        else
+            KMODPATHS="${CONFTEST_BSD_KMODPATHS}"
+        fi
+
+        for KMOD in linuxkpi.ko linuxkpi_gplv2.ko drm.ko dmabuf.ko ; do
+            for KMODPATH in $KMODPATHS; do
+                if [ -e "$KMODPATH/$KMOD" ] ; then
+                    if nm "$KMODPATH/$KMOD" | grep "$SYMBOL" >/dev/null 2>&1 ; then
+                        return 0
+                    fi
+                fi
+            done
+        done
+    fi
+
+    return 1
+}
+
+export_symbol_present_conftest() {
+
+    SYMBOL="$1"
+
+    if check_symbol_exists $SYMBOL; then
        echo "#define NV_IS_EXPORT_SYMBOL_PRESENT_$SYMBOL 1" |
            append_conftest "symbols"
    else
@ -1206,6 +1260,36 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_BIND_IOMMUFD" "" "types"
        ;;

+        vfio_device_ops_has_detach_ioas)
+            #
+            # Determine if 'vfio_device_ops' struct has 'detach_ioas' field.
+            #
+            # Added by commit 9048c7341c4df9cae04c154a8b0f556dbe913358 ("vfio-iommufd: Add detach_ioas
+            # support for physical VFIO devices
+            #
+            CODE="
+            #include <linux/pci.h>
+            #include <linux/vfio.h>
+            int conftest_vfio_device_ops_has_detach_ioas(void) {
+                return offsetof(struct vfio_device_ops, detach_ioas);
+            }"
+
+            compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_DETACH_IOAS" "" "types"
+        ;;
+
+        pfn_address_space)
+            #
+            # Determine if 'struct pfn_address_space' structure is present or not.
+            #
+            CODE="
+            #include <linux/memory-failure.h>
+            void conftest_pfn_address_space() {
+                struct pfn_address_space pfn_address_space;
+            }"
+
+            compile_check_conftest "$CODE" "NV_PFN_ADDRESS_SPACE_STRUCT_PRESENT" "" "types"
+        ;;
+
        pci_irq_vector_helpers)
            #
            # Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
@ -1343,7 +1427,7 @@ compile_test() {
            #include <drm/drm_drv.h>
            #endif

-            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE)
+            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
            #error DRM not enabled
            #endif

@ -1807,7 +1891,7 @@ compile_test() {
            #include <drm/drmP.h>
            #endif
            #include <drm/drm_atomic.h>
-            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE)
+            #if !defined(CONFIG_DRM) && !defined(CONFIG_DRM_MODULE) && !defined(__FreeBSD__)
            #error DRM not enabled
            #endif
            void conftest_drm_atomic_modeset_available(void) {
@ -5203,10 +5287,16 @@ compile_test() {
            # Added by commit 7b7b27214bba ("mm/memory_hotplug: introduce
            # add_memory_driver_managed()") in v5.8.
            #
+            # Before commit 3a0aaefe4134 ("mm/memory_hotplug: guard more
+            # declarations by CONFIG_MEMORY_HOTPLUG") in v5.10, the
+            # add_memory_driver_managed() was not guarded.
+            #
            CODE="
            #include <linux/memory_hotplug.h>
            void conftest_add_memory_driver_managed() {
+            #if defined(CONFIG_MEMORY_HOTPLUG)
                add_memory_driver_managed();
+            #endif
            }"

            compile_check_conftest "$CODE" "NV_ADD_MEMORY_DRIVER_MANAGED_PRESENT" "" "functions"
@ -5669,22 +5759,6 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
        ;;

-        migrate_vma_setup)
-            #
-            # Determine if migrate_vma_setup() function is present
-            #
-            # Added by commit a7d1f22bb74f ("mm: turn migrate_vma upside
-            # down") in v5.4.
-            #
-            CODE="
-            #include <linux/migrate.h>
-            int conftest_migrate_vma_setup(void) {
-                migrate_vma_setup();
-            }"
-
-            compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
-        ;;
-
        migrate_vma_added_flags)
            #
            # Determine if migrate_vma structure has flags
@ -5795,6 +5869,24 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
        ;;

+        iommu_is_dma_domain)
+            #
+            # Determine if iommu_is_dma_domain() function is present
+            # this also assumes that iommu_get_domain_for_dev() function is
+            # present.
+            #
+            # Added by commit bf3aed4660c6 ("iommu: Introduce explicit type
+            # for non-strict DMA domains") in v5.15
+            #
+            CODE="
+            #include <linux/iommu.h>
+            void conftest_iommu_is_dma_domain(void) {
+                iommu_is_dma_domain();
+            }"
+
+            compile_check_conftest "$CODE" "NV_IOMMU_IS_DMA_DOMAIN_PRESENT" "" "functions"
+        ;;
+
        drm_crtc_state_has_no_vblank)
            #
            # Determine if the 'drm_crtc_state' structure has 'no_vblank'.
@ -6483,6 +6575,21 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_FIND_NEXT_BIT_WRAP_PRESENT" "" "functions"
        ;;

+        crypto_tfm_ctx_aligned)
+            # Determine if 'crypto_tfm_ctx_aligned' is defined.
+            #
+            # Removed by commit 25c74a39e0f6 ("crypto: hmac - remove unnecessary
+            # alignment logic") in v6.7.
+            #
+            CODE="
+            #include <crypto/algapi.h>
+            void conftest_crypto_tfm_ctx_aligned(void) {
+                  (void)crypto_tfm_ctx_aligned();
+            }"
+
+            compile_check_conftest "$CODE" "NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT" "" "functions"
+        ;;
+
        crypto)
            #
            # Determine if we support various crypto functions.
@ -6604,9 +6711,9 @@ compile_test() {
            # 'supported_colorspaces' argument.
            #
            # The 'u32 supported_colorspaces' argument was added to
-            # drm_mode_create_dp_colorspace_property() by linux-next commit
+            # drm_mode_create_dp_colorspace_property() by commit
            # c265f340eaa8 ("drm/connector: Allow drivers to pass list of
-            # supported colorspaces").
+            # supported colorspaces") in v6.5.
            #
            # To test if drm_mode_create_dp_colorspace_property() has the
            # 'supported_colorspaces' argument, declare a function prototype
@ -6634,6 +6741,27 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_HAS_SUPPORTED_COLORSPACES_ARG" "" "types"
        ;;

+        drm_unlocked_ioctl_flag_present)
+            # Determine if DRM_UNLOCKED IOCTL flag is present.
+            #
+            # DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
+            # locking for legacy ioctls and DRM_UNLOCKED") in Linux
+            # next-20231208.
+            #
+            # DRM_UNLOCKED definition was moved from drmP.h to drm_ioctl.h by
+            # commit 2640981f3600 ("drm: document drm_ioctl.[hc]") in v4.12.
+            CODE="
+            #if defined(NV_DRM_DRM_IOCTL_H_PRESENT)
+            #include <drm/drm_ioctl.h>
+            #endif
+            #if defined(NV_DRM_DRMP_H_PRESENT)
+            #include <drm/drmP.h>
+            #endif
+            int flags = DRM_UNLOCKED;"
+
+            compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
+        ;;
+
        # When adding a new conftest entry, please use the correct format for
        # specifying the relevant upstream Linux kernel commit.  Please
        # avoid specifying -rc kernels, and only use SHAs that actually exist
@ -6935,10 +7063,12 @@ case "$5" in
        #
        VERBOSE=$6
        iommu=CONFIG_VFIO_IOMMU_TYPE1
+        iommufd_vfio_container=CONFIG_IOMMUFD_VFIO_CONTAINER
        mdev=CONFIG_VFIO_MDEV
        kvm=CONFIG_KVM_VFIO
        vfio_pci_core=CONFIG_VFIO_PCI_CORE
        VFIO_IOMMU_PRESENT=0
+        VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=0
        VFIO_MDEV_PRESENT=0
        KVM_PRESENT=0
        VFIO_PCI_CORE_PRESENT=0
@ -6948,6 +7078,10 @@ case "$5" in
                VFIO_IOMMU_PRESENT=1
            fi

+            if (test_configuration_option ${iommufd_vfio_container} || test_configuration_option ${iommufd_vfio_container}_MODULE); then
+                VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=1
+            fi
+
            if (test_configuration_option ${mdev} || test_configuration_option ${mdev}_MODULE); then
                VFIO_MDEV_PRESENT=1
            fi
@ -6960,36 +7094,23 @@ case "$5" in
                VFIO_PCI_CORE_PRESENT=1
            fi

-            # When this sanity check is run via nvidia-installer, it sets ARCH as aarch64.
-            # But, when it is run via Kbuild, ARCH is set as arm64
-            if [ "$ARCH" = "aarch64" ]; then
-                ARCH="arm64"
-            fi
-
-            if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
-
-                # On x86_64, vGPU requires MDEV framework to be present.
-                # On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
-                if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
-                   ([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
+            if ([ "$VFIO_IOMMU_PRESENT" != "0" ] || [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" != "0" ])&& [ "$KVM_PRESENT" != "0" ] ; then
+                # vGPU requires either MDEV or vfio-pci-core framework to be present.
+                if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
                    exit 0
                fi
            fi

            echo "Below CONFIG options are missing on the kernel for installing";
            echo "NVIDIA vGPU driver on KVM host";
-            if [ "$VFIO_IOMMU_PRESENT" = "0" ]; then
-                echo "CONFIG_VFIO_IOMMU_TYPE1";
+            if [ "$VFIO_IOMMU_PRESENT" = "0" ] && [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" = "0" ]; then
+                echo "either CONFIG_VFIO_IOMMU_TYPE1 or CONFIG_IOMMUFD_VFIO_CONTAINER";
            fi

-            if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
+            if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
                echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
            fi

-            if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
-                echo "CONFIG_VFIO_MDEV";
-            fi
-
            if [ "$KVM_PRESENT" = "0" ]; then
                echo "CONFIG_KVM";
            fi
--- a/kernel-open/header-presence-tests.mk
+++ b/kernel-open/header-presence-tests.mk
@ -0,0 +1,100 @@
+# Each of these headers is checked for presence with a test #include; a
+# corresponding #define will be generated in conftest/headers.h.
+NV_HEADER_PRESENCE_TESTS = \
+  asm/system.h \
+  drm/drmP.h \
+  drm/drm_aperture.h \
+  drm/drm_auth.h \
+  drm/drm_gem.h \
+  drm/drm_crtc.h \
+  drm/drm_color_mgmt.h \
+  drm/drm_atomic.h \
+  drm/drm_atomic_helper.h \
+  drm/drm_atomic_state_helper.h \
+  drm/drm_encoder.h \
+  drm/drm_atomic_uapi.h \
+  drm/drm_drv.h \
+  drm/drm_fbdev_generic.h \
+  drm/drm_framebuffer.h \
+  drm/drm_connector.h \
+  drm/drm_probe_helper.h \
+  drm/drm_blend.h \
+  drm/drm_fourcc.h \
+  drm/drm_prime.h \
+  drm/drm_plane.h \
+  drm/drm_vblank.h \
+  drm/drm_file.h \
+  drm/drm_ioctl.h \
+  drm/drm_device.h \
+  drm/drm_mode_config.h \
+  drm/drm_modeset_lock.h \
+  dt-bindings/interconnect/tegra_icc_id.h \
+  generated/autoconf.h \
+  generated/compile.h \
+  generated/utsrelease.h \
+  linux/efi.h \
+  linux/kconfig.h \
+  linux/platform/tegra/mc_utils.h \
+  linux/printk.h \
+  linux/ratelimit.h \
+  linux/prio_tree.h \
+  linux/log2.h \
+  linux/of.h \
+  linux/bug.h \
+  linux/sched.h \
+  linux/sched/mm.h \
+  linux/sched/signal.h \
+  linux/sched/task.h \
+  linux/sched/task_stack.h \
+  xen/ioemu.h \
+  linux/fence.h \
+  linux/dma-fence.h \
+  linux/dma-resv.h \
+  soc/tegra/chip-id.h \
+  soc/tegra/fuse.h \
+  soc/tegra/tegra_bpmp.h \
+  video/nv_internal.h \
+  linux/platform/tegra/dce/dce-client-ipc.h \
+  linux/nvhost.h \
+  linux/nvhost_t194.h \
+  linux/host1x-next.h \
+  asm/book3s/64/hash-64k.h \
+  asm/set_memory.h \
+  asm/prom.h \
+  asm/powernv.h \
+  linux/atomic.h \
+  asm/barrier.h \
+  asm/opal-api.h \
+  sound/hdaudio.h \
+  asm/pgtable_types.h \
+  asm/page.h \
+  linux/stringhash.h \
+  linux/dma-map-ops.h \
+  rdma/peer_mem.h \
+  sound/hda_codec.h \
+  linux/dma-buf.h \
+  linux/time.h \
+  linux/platform_device.h \
+  linux/mutex.h \
+  linux/reset.h \
+  linux/of_platform.h \
+  linux/of_device.h \
+  linux/of_gpio.h \
+  linux/gpio.h \
+  linux/gpio/consumer.h \
+  linux/interconnect.h \
+  linux/pm_runtime.h \
+  linux/clk.h \
+  linux/clk-provider.h \
+  linux/ioasid.h \
+  linux/stdarg.h \
+  linux/iosys-map.h \
+  asm/coco.h \
+  linux/vfio_pci_core.h \
+  linux/mdev.h \
+  soc/tegra/bpmp-abi.h \
+  soc/tegra/bpmp.h \
+  linux/sync_file.h \
+  linux/cc_platform.h \
+  asm/cpufeature.h
+
--- a/kernel-open/nvidia-drm/nv-pci-table.c
+++ b/kernel-open/nvidia-drm/nv-pci-table.c
@ -25,6 +25,15 @@
 #include <linux/module.h>

 #include "nv-pci-table.h"
+#include "cpuopsys.h"
+
+#if defined(NV_BSD)
+/* Define PCI classes that FreeBSD's linuxkpi is missing */
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+#define PCI_CLASS_DISPLAY_VGA 0x0300
+#define PCI_CLASS_DISPLAY_3D 0x0302
+#define PCI_CLASS_BRIDGE_OTHER 0x0680
+#endif

 /* Devices supported by RM */
 struct pci_device_id nv_pci_table[] = {
@ -48,7 +57,7 @@ struct pci_device_id nv_pci_table[] = {
 };

 /* Devices supported by all drivers in nvidia.ko */
-struct pci_device_id nv_module_device_table[] = {
+struct pci_device_id nv_module_device_table[4] = {
    {
        .vendor      = PCI_VENDOR_ID_NVIDIA,
        .device      = PCI_ANY_ID,
@ -76,4 +85,6 @@ struct pci_device_id nv_module_device_table[] = {
    { }
 };

+#if defined(NV_LINUX)
 MODULE_DEVICE_TABLE(pci, nv_module_device_table);
+#endif
--- a/kernel-open/nvidia-drm/nv-pci-table.h
+++ b/kernel-open/nvidia-drm/nv-pci-table.h
@ -27,5 +27,6 @@
 #include <linux/pci.h>

 extern struct pci_device_id nv_pci_table[];
+extern struct pci_device_id nv_module_device_table[4];

 #endif /* _NV_PCI_TABLE_H_ */
--- a/kernel-open/nvidia-drm/nvidia-drm-conftest.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-conftest.h
@ -24,6 +24,7 @@
 #define __NVIDIA_DRM_CONFTEST_H__

 #include "conftest.h"
+#include "nvtypes.h"

 /*
 * NOTE: This file is expected to get included at the top before including any
@ -72,4 +73,121 @@
 #undef NV_DRM_COLOR_MGMT_AVAILABLE
 #endif

+/*
+ * Adapt to quirks in FreeBSD's Linux kernel compatibility layer.
+ */
+#if defined(NV_BSD)
+
+#include <linux/rwsem.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+
+/* For nv_drm_gem_prime_force_fence_signal */
+#ifndef spin_is_locked
+#define spin_is_locked(lock) mtx_owned(lock.m)
+#endif
+
+#ifndef rwsem_is_locked
+#define rwsem_is_locked(sem) (((sem)->sx.sx_lock & (SX_LOCK_SHARED)) \
+                              || ((sem)->sx.sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)))
+#endif
+
+/*
+ * FreeBSD does not define vm_flags_t in its linuxkpi, since there is already
+ * a FreeBSD vm_flags_t (of a different size) and they don't want the names to
+ * collide. Temporarily redefine it when including nv-mm.h
+ */
+#define vm_flags_t unsigned long
+#include "nv-mm.h"
+#undef vm_flags_t
+
+/*
+ * sys/nv.h and nvidia/nv.h have the same header guard
+ * we need to clear it for nvlist_t to get loaded
+ */
+#undef _NV_H_
+#include <sys/nv.h>
+
+/*
+ * For now just use set_page_dirty as the lock variant
+ * is not ported for FreeBSD. (in progress). This calls
+ * vm_page_dirty. Used in nv-mm.h
+ */
+#define set_page_dirty_lock set_page_dirty
+
+/*
+ * FreeBSD does not implement drm_atomic_state_free, simply
+ * default to drm_atomic_state_put
+ */
+#define drm_atomic_state_free drm_atomic_state_put
+
+#if __FreeBSD_version < 1300000
+/* redefine LIST_HEAD_INIT to the linux version */
+#include <linux/list.h>
+#define LIST_HEAD_INIT(name) LINUX_LIST_HEAD_INIT(name)
+#endif
+
+/*
+ * FreeBSD currently has only vmf_insert_pfn_prot defined, and it has a
+ * static assert warning not to use it since all of DRM's usages are in
+ * loops with the vm obj lock(s) held. Instead we should use the lkpi
+ * function itself directly. For us none of this applies so we can just
+ * wrap it in our own definition of vmf_insert_pfn
+ */
+#ifndef NV_VMF_INSERT_PFN_PRESENT
+#define NV_VMF_INSERT_PFN_PRESENT 1
+
+#if __FreeBSD_version < 1300000
+#define VM_SHARED       (1 << 17)
+
+/* Not present in 12.2 */
+static inline vm_fault_t
+lkpi_vmf_insert_pfn_prot_locked(struct vm_area_struct *vma, unsigned long addr,
+    unsigned long pfn, pgprot_t prot)
+{
+       vm_object_t vm_obj = vma->vm_obj;
+       vm_page_t page;
+       vm_pindex_t pindex;
+
+       VM_OBJECT_ASSERT_WLOCKED(vm_obj);
+       pindex = OFF_TO_IDX(addr - vma->vm_start);
+       if (vma->vm_pfn_count == 0)
+               vma->vm_pfn_first = pindex;
+       MPASS(pindex <= OFF_TO_IDX(vma->vm_end));
+
+       page = vm_page_grab(vm_obj, pindex, VM_ALLOC_NORMAL);
+       if (page == NULL) {
+               page = PHYS_TO_VM_PAGE(IDX_TO_OFF(pfn));
+               vm_page_xbusy(page);
+               if (vm_page_insert(page, vm_obj, pindex)) {
+                       vm_page_xunbusy(page);
+                       return (VM_FAULT_OOM);
+               }
+               page->valid = VM_PAGE_BITS_ALL;
+       }
+       pmap_page_set_memattr(page, pgprot2cachemode(prot));
+       vma->vm_pfn_count++;
+
+       return (VM_FAULT_NOPAGE);
+}
+#endif
+
+static inline vm_fault_t
+vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+    unsigned long pfn)
+{
+       vm_fault_t ret;
+
+       VM_OBJECT_WLOCK(vma->vm_obj);
+       ret = lkpi_vmf_insert_pfn_prot_locked(vma, addr, pfn, vma->vm_page_prot);
+       VM_OBJECT_WUNLOCK(vma->vm_obj);
+
+       return (ret);
+}
+
+#endif
+
+#endif /* defined(NV_BSD) */
+
 #endif /* defined(__NVIDIA_DRM_CONFTEST_H__) */
--- a/kernel-open/nvidia-drm/nvidia-drm-crtc.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-crtc.c
@ -92,11 +92,22 @@ static void nv_drm_plane_destroy(struct drm_plane *plane)
    nv_drm_free(nv_plane);
 }

+static inline void
+plane_config_clear(struct NvKmsKapiLayerConfig *layerConfig)
+{
+    if (layerConfig == NULL) {
+        return;
+    }
+
+    memset(layerConfig, 0, sizeof(*layerConfig));
+    layerConfig->csc = NVKMS_IDENTITY_CSC_MATRIX;
+}
+
 static inline void
 plane_req_config_disable(struct NvKmsKapiLayerRequestedConfig *req_config)
 {
    /* Clear layer config */
-    memset(&req_config->config, 0, sizeof(req_config->config));
+    plane_config_clear(&req_config->config);

    /* Set flags to get cleared layer config applied */
    req_config->flags.surfaceChanged = NV_TRUE;
@ -113,6 +124,45 @@ cursor_req_config_disable(struct NvKmsKapiCursorRequestedConfig *req_config)
    req_config->flags.surfaceChanged = NV_TRUE;
 }

+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+static void color_mgmt_config_ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
+                                         struct drm_color_ctm  *drm_ctm)
+{
+    int y;
+
+    /* CTM is a 3x3 matrix while ours is 3x4. Zero out the last column. */
+    nvkms_csc->m[0][3] = nvkms_csc->m[1][3] = nvkms_csc->m[2][3] = 0;
+
+    for (y = 0; y < 3; y++) {
+        int x;
+
+        for (x = 0; x < 3; x++) {
+            /*
+             * Values in the CTM are encoded in S31.32 sign-magnitude fixed-
+             * point format, while NvKms CSC values are signed 2's-complement
+             * S15.16 (Ssign-extend12-3.16?) fixed-point format.
+             */
+            NvU64 ctmVal = drm_ctm->matrix[y*3 + x];
+            NvU64 signBit = ctmVal & (1ULL << 63);
+            NvU64 magnitude = ctmVal & ~signBit;
+
+            /*
+             * Drop the low 16 bits of the fractional part and the high 17 bits
+             * of the integral part. Drop 17 bits to avoid corner cases where
+             * the highest resulting bit is a 1, causing the `cscVal = -cscVal`
+             * line to result in a positive number.
+             */
+            NvS32 cscVal = (magnitude >> 16) & ((1ULL << 31) - 1);
+            if (signBit) {
+                cscVal = -cscVal;
+            }
+
+            nvkms_csc->m[y][x] = cscVal;
+        }
+    }
+}
+#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
+
 static void
 cursor_plane_req_config_update(struct drm_plane *plane,
                               struct drm_plane_state *plane_state,
@ -239,6 +289,8 @@ plane_req_config_update(struct drm_plane *plane,
            .dstY = plane_state->crtc_y,
            .dstWidth  = plane_state->crtc_w,
            .dstHeight = plane_state->crtc_h,
+
+            .csc = old_config.csc
        },
    };

@ -578,6 +630,24 @@ static int nv_drm_plane_atomic_check(struct drm_plane *plane,
                return ret;
            }

+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+            if (crtc_state->color_mgmt_changed) {
+                /*
+                 * According to the comment in the Linux kernel's
+                 * drivers/gpu/drm/drm_color_mgmt.c, if this property is NULL,
+                 * the CTM needs to be changed to the identity matrix
+                 */
+                if (crtc_state->ctm) {
+                    color_mgmt_config_ctm_to_csc(&plane_requested_config->config.csc,
+                                                 (struct drm_color_ctm *)crtc_state->ctm->data);
+                } else {
+                    plane_requested_config->config.csc = NVKMS_IDENTITY_CSC_MATRIX;
+                }
+                plane_requested_config->config.cscUseMain = NV_FALSE;
+                plane_requested_config->flags.cscChanged = NV_TRUE;
+            }
+#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
+
            if (__is_async_flip_requested(plane, crtc_state)) {
                /*
                 * Async flip requests that the flip happen 'as soon as
@ -668,6 +738,38 @@ static int nv_drm_plane_atomic_get_property(
    return -EINVAL;
 }

+/**
+ * nv_drm_plane_atomic_reset - plane state reset hook
+ * @plane: DRM plane
+ *
+ * Allocate an empty DRM plane state.
+ */
+static void nv_drm_plane_atomic_reset(struct drm_plane *plane)
+{
+    struct nv_drm_plane_state *nv_plane_state =
+        nv_drm_calloc(1, sizeof(*nv_plane_state));
+
+    if (!nv_plane_state) {
+        return;
+    }
+
+    drm_atomic_helper_plane_reset(plane);
+
+    /*
+     * The drm atomic helper function allocates a state object that is the wrong
+     * size. Copy its contents into the one we allocated above and replace the
+     * pointer.
+     */
+    if (plane->state) {
+        nv_plane_state->base = *plane->state;
+        kfree(plane->state);
+        plane->state = &nv_plane_state->base;
+    } else {
+        kfree(nv_plane_state);
+    }
+}
+
+
 static struct drm_plane_state *
 nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
 {
@ -727,7 +829,7 @@ static const struct drm_plane_funcs nv_plane_funcs = {
    .update_plane           = drm_atomic_helper_update_plane,
    .disable_plane          = drm_atomic_helper_disable_plane,
    .destroy                = nv_drm_plane_destroy,
-    .reset                  = drm_atomic_helper_plane_reset,
+    .reset                  = nv_drm_plane_atomic_reset,
    .atomic_get_property    = nv_drm_plane_atomic_get_property,
    .atomic_set_property    = nv_drm_plane_atomic_set_property,
    .atomic_duplicate_state = nv_drm_plane_atomic_duplicate_state,
@ -784,6 +886,52 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
    }
 }

+static inline struct nv_drm_crtc_state *nv_drm_crtc_state_alloc(void)
+{
+    struct nv_drm_crtc_state *nv_state = nv_drm_calloc(1, sizeof(*nv_state));
+    int i;
+
+    if (nv_state == NULL) {
+        return NULL;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(nv_state->req_config.layerRequestedConfig); i++) {
+        plane_config_clear(&nv_state->req_config.layerRequestedConfig[i].config);
+    }
+    return nv_state;
+}
+
+
+/**
+ * nv_drm_atomic_crtc_reset - crtc state reset hook
+ * @crtc: DRM crtc
+ *
+ * Allocate an empty DRM crtc state.
+ */
+static void nv_drm_atomic_crtc_reset(struct drm_crtc *crtc)
+{
+    struct nv_drm_crtc_state *nv_state = nv_drm_crtc_state_alloc();
+
+    if (!nv_state) {
+        return;
+    }
+
+    drm_atomic_helper_crtc_reset(crtc);
+
+    /*
+     * The drm atomic helper function allocates a state object that is the wrong
+     * size. Copy its contents into the one we allocated above and replace the
+     * pointer.
+     */
+    if (crtc->state) {
+        nv_state->base = *crtc->state;
+        kfree(crtc->state);
+        crtc->state = &nv_state->base;
+    } else {
+        kfree(nv_state);
+    }
+}
+
 /**
 * nv_drm_atomic_crtc_duplicate_state - crtc state duplicate hook
 * @crtc: DRM crtc
@ -795,7 +943,7 @@ static inline void nv_drm_crtc_duplicate_req_head_modeset_config(
 static struct drm_crtc_state*
 nv_drm_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
 {
-    struct nv_drm_crtc_state *nv_state = nv_drm_calloc(1, sizeof(*nv_state));
+    struct nv_drm_crtc_state *nv_state = nv_drm_crtc_state_alloc();

    if (nv_state == NULL) {
        return NULL;
@ -851,7 +999,7 @@ static void nv_drm_atomic_crtc_destroy_state(struct drm_crtc *crtc,
 static struct drm_crtc_funcs nv_crtc_funcs = {
    .set_config             = drm_atomic_helper_set_config,
    .page_flip              = drm_atomic_helper_page_flip,
-    .reset                  = drm_atomic_helper_crtc_reset,
+    .reset                  = nv_drm_atomic_crtc_reset,
    .destroy                = nv_drm_crtc_destroy,
    .atomic_duplicate_state = nv_drm_atomic_crtc_duplicate_state,
    .atomic_destroy_state   = nv_drm_atomic_crtc_destroy_state,
@ -914,44 +1062,7 @@ static int color_mgmt_config_copy_lut(struct NvKmsLutRamps *nvkms_lut,
    return 0;
 }

-static void color_mgmt_config_ctm_to_csc(struct NvKmsCscMatrix *nvkms_csc,
-                                         struct drm_color_ctm  *drm_ctm)
-{
-    int y;
-
-    /* CTM is a 3x3 matrix while ours is 3x4. Zero out the last column. */
-    nvkms_csc->m[0][3] = nvkms_csc->m[1][3] = nvkms_csc->m[2][3] = 0;
-
-    for (y = 0; y < 3; y++) {
-        int x;
-
-        for (x = 0; x < 3; x++) {
-            /*
-             * Values in the CTM are encoded in S31.32 sign-magnitude fixed-
-             * point format, while NvKms CSC values are signed 2's-complement
-             * S15.16 (Ssign-extend12-3.16?) fixed-point format.
-             */
-            NvU64 ctmVal = drm_ctm->matrix[y*3 + x];
-            NvU64 signBit = ctmVal & (1ULL << 63);
-            NvU64 magnitude = ctmVal & ~signBit;
-
-            /*
-             * Drop the low 16 bits of the fractional part and the high 17 bits
-             * of the integral part. Drop 17 bits to avoid corner cases where
-             * the highest resulting bit is a 1, causing the `cscVal = -cscVal`
-             * line to result in a positive number.
-             */
-            NvS32 cscVal = (magnitude >> 16) & ((1ULL << 31) - 1);
-            if (signBit) {
-                cscVal = -cscVal;
-            }
-
-            nvkms_csc->m[y][x] = cscVal;
-        }
-    }
-}
-
-static int color_mgmt_config_set(struct nv_drm_crtc_state *nv_crtc_state,
+static int color_mgmt_config_set_luts(struct nv_drm_crtc_state *nv_crtc_state,
                                      struct NvKmsKapiHeadRequestedConfig *req_config)
 {
    struct NvKmsKapiHeadModeSetConfig *modeset_config =
@ -959,25 +1070,17 @@ static int color_mgmt_config_set(struct nv_drm_crtc_state *nv_crtc_state,
    struct drm_crtc_state *crtc_state = &nv_crtc_state->base;
    int ret = 0;

-    struct drm_color_lut *degamma_lut = NULL;
-    struct drm_color_ctm *ctm = NULL;
-    struct drm_color_lut *gamma_lut = NULL;
-    uint64_t degamma_len = 0;
-    uint64_t gamma_len = 0;
-
-    int i;
-    struct drm_plane *plane;
-    struct drm_plane_state *plane_state;
-
    /*
     * According to the comment in the Linux kernel's
-     * drivers/gpu/drm/drm_color_mgmt.c, if any of these properties are NULL,
-     * that LUT or CTM needs to be changed to a linear LUT or identity matrix
-     * respectively.
+     * drivers/gpu/drm/drm_color_mgmt.c, if either property is NULL, that LUT
+     * needs to be changed to a linear LUT
     */

    req_config->flags.lutChanged = NV_TRUE;
    if (crtc_state->degamma_lut) {
+        struct drm_color_lut *degamma_lut = NULL;
+        uint64_t degamma_len = 0;
+
        nv_crtc_state->ilut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->ilut_ramps));
        if (!nv_crtc_state->ilut_ramps) {
            ret = -ENOMEM;
@ -1007,34 +1110,13 @@ static int color_mgmt_config_set(struct nv_drm_crtc_state *nv_crtc_state,
        modeset_config->lut.input.start     = 0;
        modeset_config->lut.input.end       = 0;
        modeset_config->lut.input.pRamps    = NULL;
-    }

-    nv_drm_for_each_new_plane_in_state(crtc_state->state, plane,
-                                       plane_state, i) {
-        struct nv_drm_plane *nv_plane = to_nv_plane(plane);
-        uint32_t layer = nv_plane->layer_idx;
-        struct NvKmsKapiLayerRequestedConfig *layer_config;
-
-        if (layer == NVKMS_KAPI_LAYER_INVALID_IDX || plane_state->crtc != crtc_state->crtc) {
-            continue;
-        }
-        layer_config = &req_config->layerRequestedConfig[layer];
-
-        if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX && crtc_state->ctm) {
-            ctm = (struct drm_color_ctm *)crtc_state->ctm->data;
-
-            color_mgmt_config_ctm_to_csc(&layer_config->config.csc, ctm);
-            layer_config->config.cscUseMain = NV_FALSE;
-        } else {
-            /* When crtc_state->ctm is unset, this also sets the main layer to
-             * the identity matrix.
-             */
-            layer_config->config.csc = NVKMS_IDENTITY_CSC_MATRIX;
-        }
-        layer_config->flags.cscChanged = NV_TRUE;
    }

    if (crtc_state->gamma_lut) {
+        struct drm_color_lut *gamma_lut = NULL;
+        uint64_t gamma_len = 0;
+
        nv_crtc_state->olut_ramps = nv_drm_calloc(1, sizeof(*nv_crtc_state->olut_ramps));
        if (!nv_crtc_state->olut_ramps) {
            ret = -ENOMEM;
@ -1158,7 +1240,7 @@ static int nv_drm_crtc_atomic_check(struct drm_crtc *crtc,
        crtc_state->color_mgmt_changed = NV_TRUE;
    }
    if (crtc_state->color_mgmt_changed) {
-        if ((ret = color_mgmt_config_set(nv_crtc_state, req_config)) != 0) {
+        if ((ret = color_mgmt_config_set_luts(nv_crtc_state, req_config)) != 0) {
            return ret;
        }
    }
@ -1428,7 +1510,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
        goto failed;
    }

-    nv_state = nv_drm_calloc(1, sizeof(*nv_state));
+    nv_state = nv_drm_crtc_state_alloc();
    if (nv_state == NULL) {
        goto failed_state_alloc;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.c
@ -74,6 +74,7 @@
 #endif

 #include <linux/pci.h>
+#include <linux/workqueue.h>

 /*
 * Commit fcd70cd36b9b ("drm: Split out drm_probe_helper.h")
@ -405,6 +406,27 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
    return 0;
 }

+#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
+/*
+ * We can't just call drm_kms_helper_hotplug_event directly because
+ * fbdev_generic may attempt to set a mode from inside the hotplug event
+ * handler. Because kapi event handling runs on nvkms_kthread_q, this blocks
+ * other event processing including the flip completion notifier expected by
+ * nv_drm_atomic_commit.
+ *
+ * Defer hotplug event handling to a work item so that nvkms_kthread_q can
+ * continue processing events while a DRM modeset is in progress.
+ */
+static void nv_drm_handle_hotplug_event(struct work_struct *work)
+{
+    struct delayed_work *dwork = to_delayed_work(work);
+    struct nv_drm_device *nv_dev =
+        container_of(dwork, struct nv_drm_device, hotplug_event_work);
+
+    drm_kms_helper_hotplug_event(nv_dev->dev);
+}
+#endif
+
 static int nv_drm_load(struct drm_device *dev, unsigned long flags)
 {
 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
@ -540,6 +562,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)

    /* Enable event handling */

+    INIT_DELAYED_WORK(&nv_dev->hotplug_event_work, nv_drm_handle_hotplug_event);
    atomic_set(&nv_dev->enable_event_handling, true);

    init_waitqueue_head(&nv_dev->flip_event_wq);
@ -567,6 +590,7 @@ static void __nv_drm_unload(struct drm_device *dev)
        return;
    }

+    cancel_delayed_work_sync(&nv_dev->hotplug_event_work);
    mutex_lock(&nv_dev->lock);

    WARN_ON(nv_dev->subOwnershipGranted);
@ -1523,9 +1547,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
                      DRM_RENDER_ALLOW|DRM_UNLOCKED),
 #endif

+    /*
+     * DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
+     * v4.10 commit fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions"
+     * (Linux v4.4 commit ea487835e887 "drm: Enforce unlocked ioctl operation
+     * for kms driver ioctls" previously did it only for drivers that set the
+     * DRM_MODESET flag), so this will race with SET_CLIENT_CAP. Linux v4.11
+     * commit dcf727ab5d17 "drm: setclientcap doesn't need the drm BKL" also
+     * removed locking from SET_CLIENT_CAP so there is no use attempting to lock
+     * manually. The latter commit acknowledges that this can expose userspace
+     * to inconsistent behavior when racing with itself, but accepts that risk.
+     */
    DRM_IOCTL_DEF_DRV(NVIDIA_GET_CLIENT_CAPABILITY,
                      nv_drm_get_client_capability_ioctl,
                      0),
+
 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
    DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
                      nv_drm_get_crtc_crc32_ioctl,
@ -1647,7 +1683,7 @@ static struct drm_driver nv_drm_driver = {
 * kernel supports atomic modeset and the 'modeset' kernel module
 * parameter is true.
 */
-static void nv_drm_update_drm_driver_features(void)
+void nv_drm_update_drm_driver_features(void)
 {
 #if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)

@ -1673,7 +1709,7 @@ static void nv_drm_update_drm_driver_features(void)
 /*
 * Helper function for allocate/register DRM device for given NVIDIA GPU ID.
 */
-static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
+void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
 {
    struct nv_drm_device *nv_dev = NULL;
    struct drm_device *dev = NULL;
@ -1711,8 +1747,15 @@ static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
    dev->dev_private = nv_dev;
    nv_dev->dev = dev;

+    bool bus_is_pci =
+#if defined(NV_LINUX)
+        device->bus == &pci_bus_type;
+#elif defined(NV_BSD)
+        devclass_find("pci");
+#endif
+
 #if defined(NV_DRM_DEVICE_HAS_PDEV)
-    if (device->bus == &pci_bus_type) {
+    if (bus_is_pci) {
        dev->pdev = to_pci_dev(device);
    }
 #endif
@ -1733,7 +1776,7 @@ static void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
            goto failed_grab_ownership;
        }

-        if (device->bus == &pci_bus_type) {
+        if (bus_is_pci) {
            struct pci_dev *pdev = to_pci_dev(device);

 #if defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_HAS_DRIVER_ARG)
@ -1773,6 +1816,7 @@ failed_drm_alloc:
 /*
 * Enumerate NVIDIA GPUs and allocate/register DRM device for each of them.
 */
+#if defined(NV_LINUX)
 int nv_drm_probe_devices(void)
 {
    nv_gpu_info_t *gpu_info = NULL;
@ -1815,6 +1859,7 @@ done:

    return ret;
 }
+#endif

 /*
 * Unregister all NVIDIA DRM devices.
@ -1840,4 +1885,51 @@ void nv_drm_remove_devices(void)
    }
 }

+/*
+ * Handle system suspend and resume.
+ *
+ * Normally, a DRM driver would use drm_mode_config_helper_suspend() to save the
+ * current state on suspend and drm_mode_config_helper_resume() to restore it
+ * after resume. This works for upstream drivers because user-mode tasks are
+ * frozen before the suspend hook is called.
+ *
+ * In the case of nvidia-drm, the suspend hook is also called when 'suspend' is
+ * written to /proc/driver/nvidia/suspend, before user-mode tasks are frozen.
+ * However, we don't actually need to save and restore the display state because
+ * the driver requires a VT switch to an unused VT before suspending and a
+ * switch back to the application (or fbdev console) on resume. The DRM client
+ * (or fbdev helper functions) will restore the appropriate mode on resume.
+ *
+ */
+void nv_drm_suspend_resume(NvBool suspend)
+{
+#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
+    struct nv_drm_device *nv_dev = dev_list;
+
+    /*
+     * NVKMS shuts down all heads on suspend. Update DRM state accordingly.
+     */
+    for (nv_dev = dev_list; nv_dev; nv_dev = nv_dev->next) {
+        struct drm_device *dev = nv_dev->dev;
+
+        if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+            continue;
+        }
+
+        if (suspend) {
+            drm_kms_helper_poll_disable(dev);
+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+            drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
+#endif
+            drm_mode_config_reset(dev);
+        } else {
+#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
+            drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
+#endif
+            drm_kms_helper_poll_enable(dev);
+        }
+    }
+#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
+}
+
 #endif /* NV_DRM_AVAILABLE */
--- a/kernel-open/nvidia-drm/nvidia-drm-drv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-drv.h
@ -31,6 +31,12 @@ int nv_drm_probe_devices(void);

 void nv_drm_remove_devices(void);

+void nv_drm_suspend_resume(NvBool suspend);
+
+void nv_drm_register_drm_device(const nv_gpu_info_t *);
+
+void nv_drm_update_drm_driver_features(void);
+
 #endif /* defined(NV_DRM_AVAILABLE) */

 #endif /* __NVIDIA_DRM_DRV_H__ */
--- a/kernel-open/nvidia-drm/nvidia-drm-encoder.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-encoder.c
@ -300,7 +300,7 @@ void nv_drm_handle_display_change(struct nv_drm_device *nv_dev,

    nv_drm_connector_mark_connection_status_dirty(nv_encoder->nv_connector);

-    drm_kms_helper_hotplug_event(dev);
+    schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
 }

 void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
@ -347,6 +347,6 @@ void nv_drm_handle_dynamic_display_connected(struct nv_drm_device *nv_dev,
    drm_reinit_primary_mode_group(dev);
 #endif

-    drm_kms_helper_hotplug_event(dev);
+    schedule_delayed_work(&nv_dev->hotplug_event_work, 0);
 }
 #endif
--- a/kernel-open/nvidia-drm/nvidia-drm-fb.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fb.c
@ -240,7 +240,7 @@ struct drm_framebuffer *nv_drm_internal_framebuffer_create(
        if (nv_dev->modifiers[i] == DRM_FORMAT_MOD_INVALID) {
            NV_DRM_DEV_DEBUG_DRIVER(
                nv_dev,
-                "Invalid format modifier for framebuffer object: 0x%016llx",
+                "Invalid format modifier for framebuffer object: 0x%016" NvU64_fmtx,
                modifier);
            return ERR_PTR(-EINVAL);
        }
--- a/kernel-open/nvidia-drm/nvidia-drm-fence.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-fence.c
@ -1638,7 +1638,7 @@ int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
    if (p->pre_wait_value >= p->post_wait_value) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
-            "Non-monotonic wait values specified to fence wait: 0x%llu, 0x%llu",
+            "Non-monotonic wait values specified to fence wait: 0x%" NvU64_fmtu ", 0x%" NvU64_fmtu,
            p->pre_wait_value, p->post_wait_value);
        goto done;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-dma-buf.c
@ -71,12 +71,42 @@ static int __nv_drm_gem_dma_buf_create_mmap_offset(
 static int __nv_drm_gem_dma_buf_mmap(struct nv_drm_gem_object *nv_gem,
                                     struct vm_area_struct *vma)
 {
+#if defined(NV_LINUX)
    struct dma_buf_attachment *attach = nv_gem->base.import_attach;
    struct dma_buf *dma_buf = attach->dmabuf;
+#endif
    struct file *old_file;
    int ret;

    /* check if buffer supports mmap */
+#if defined(NV_BSD)
+    /*
+     * Most of the FreeBSD DRM code refers to struct file*, which is actually
+     * a struct linux_file*. The dmabuf code in FreeBSD is not actually plumbed
+     * through the same linuxkpi bits it seems (probably so it can be used
+     * elsewhere), so dma_buf->file really is a native FreeBSD struct file...
+     */
+    if (!nv_gem->base.filp->f_op->mmap)
+        return -EINVAL;
+
+    /* readjust the vma */
+    get_file(nv_gem->base.filp);
+    old_file = vma->vm_file;
+    vma->vm_file = nv_gem->base.filp;
+    vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);
+
+    ret = nv_gem->base.filp->f_op->mmap(nv_gem->base.filp, vma);
+
+    if (ret) {
+        /* restore old parameters on failure */
+        vma->vm_file = old_file;
+        vma->vm_pgoff += drm_vma_node_start(&nv_gem->base.vma_node);
+        fput(nv_gem->base.filp);
+    } else {
+        if (old_file)
+            fput(old_file);
+    }
+#else
    if (!dma_buf->file->f_op->mmap)
        return -EINVAL;

@ -84,18 +114,20 @@ static int __nv_drm_gem_dma_buf_mmap(struct nv_drm_gem_object *nv_gem,
    get_file(dma_buf->file);
    old_file = vma->vm_file;
    vma->vm_file = dma_buf->file;
-    vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);;
+    vma->vm_pgoff -= drm_vma_node_start(&nv_gem->base.vma_node);

    ret = dma_buf->file->f_op->mmap(dma_buf->file, vma);

    if (ret) {
        /* restore old parameters on failure */
        vma->vm_file = old_file;
+        vma->vm_pgoff += drm_vma_node_start(&nv_gem->base.vma_node);
        fput(dma_buf->file);
    } else {
        if (old_file)
            fput(old_file);
    }
+#endif

    return ret;
 }
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-nvkms-memory.c
@ -37,6 +37,9 @@
 #endif

 #include <linux/io.h>
+#if defined(NV_BSD)
+#include <vm/vm_pageout.h>
+#endif

 #include "nv-mm.h"

@ -93,7 +96,17 @@ static vm_fault_t __nv_drm_gem_nvkms_handle_vma_fault(
    if (nv_nvkms_memory->pages_count == 0) {
        pfn = (unsigned long)(uintptr_t)nv_nvkms_memory->pPhysicalAddress;
        pfn >>= PAGE_SHIFT;
+#if defined(NV_LINUX)
+        /*
+         * FreeBSD doesn't set pgoff. We instead have pfn be the base physical
+         * address, and we will calculate the index pidx from the virtual address.
+         *
+         * This only works because linux_cdev_pager_populate passes the pidx as
+         * vmf->virtual_address. Then we turn the virtual address
+         * into a physical page number.
+         */
        pfn += page_offset;
+#endif
    } else {
        BUG_ON(page_offset >= nv_nvkms_memory->pages_count);
        pfn = page_to_pfn(nv_nvkms_memory->pages[page_offset]);
@ -243,6 +256,15 @@ static int __nv_drm_nvkms_gem_obj_init(
    NvU64 *pages = NULL;
    NvU32 numPages = 0;

+    if ((size % PAGE_SIZE) != 0) {
+        NV_DRM_DEV_LOG_ERR(
+            nv_dev,
+            "NvKmsKapiMemory 0x%p size should be in a multiple of page size to "
+            "create a gem object",
+            pMemory);
+        return -EINVAL;
+    }
+
    nv_nvkms_memory->pPhysicalAddress = NULL;
    nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
    nv_nvkms_memory->physically_mapped = false;
@ -314,7 +336,7 @@ int nv_drm_dumb_create(
        ret = -ENOMEM;
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
-            "Failed to allocate NvKmsKapiMemory for dumb object of size %llu",
+            "Failed to allocate NvKmsKapiMemory for dumb object of size %" NvU64_fmtu,
            args->size);
        goto nvkms_alloc_memory_failed;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-gem-user-memory.c
@ -36,6 +36,10 @@
 #include "linux/mm.h"
 #include "nv-mm.h"

+#if defined(NV_BSD)
+#include <vm/vm_pageout.h>
+#endif
+
 static inline
 void __nv_drm_gem_user_memory_free(struct nv_drm_gem_object *nv_gem)
 {
@ -113,6 +117,10 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
    page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);

    BUG_ON(page_offset >= nv_user_memory->pages_count);
+
+#if !defined(NV_LINUX)
+    ret = vmf_insert_pfn(vma, address, page_to_pfn(nv_user_memory->pages[page_offset]));
+#else /* !defined(NV_LINUX) */
    ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
    switch (ret) {
        case 0:
@ -131,6 +139,7 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
            ret = VM_FAULT_SIGBUS;
            break;
    }
+#endif /* !defined(NV_LINUX) */

    return ret;
 }
@ -170,7 +179,7 @@ int nv_drm_gem_import_userspace_memory_ioctl(struct drm_device *dev,
    if ((params->size % PAGE_SIZE) != 0) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
-            "Userspace memory 0x%llx size should be in a multiple of page "
+            "Userspace memory 0x%" NvU64_fmtx " size should be in a multiple of page "
            "size to create a gem object",
            params->address);
        return -EINVAL;
@ -183,7 +192,7 @@ int nv_drm_gem_import_userspace_memory_ioctl(struct drm_device *dev,
    if (ret != 0) {
        NV_DRM_DEV_LOG_ERR(
            nv_dev,
-            "Failed to lock user pages for address 0x%llx: %d",
+            "Failed to lock user pages for address 0x%" NvU64_fmtx ": %d",
            params->address, ret);
        return ret;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-helper.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-helper.h
@ -612,6 +612,19 @@ static inline int nv_drm_format_num_planes(uint32_t format)

 #endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */

+/*
+ * DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
+ * locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
+ * implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
+ * fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
+ * commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
+ * ioctls" previously did it only for drivers that set the DRM_MODESET flag), so
+ * it was effectively a no-op anyway.
+ */
+#if !defined(NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT)
+#define DRM_UNLOCKED 0
+#endif
+
 /*
 * drm_vma_offset_exact_lookup_locked() were added
 * by kernel commit 2225cfe46bcc which was Signed-off-by:
--- a/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-ioctl.h
@ -71,7 +71,7 @@
 *
 * 'warning: suggest parentheses around arithmetic in operand of |'
 */
-#if defined(NV_LINUX)
+#if defined(NV_LINUX) || defined(NV_BSD)
 #define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED                         \
    DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
 #define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED                        \
--- a/kernel-open/nvidia-drm/nvidia-drm-linux.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-linux.c
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -21,8 +21,6 @@
 */

 #include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/err.h>

 #include "nvidia-drm-os-interface.h"
 #include "nvidia-drm.h"
@ -31,261 +29,18 @@

 #if defined(NV_DRM_AVAILABLE)

-#if defined(NV_DRM_DRMP_H_PRESENT)
-#include <drm/drmP.h>
-#endif
-
-#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
-#include <linux/file.h>
-#include <linux/sync_file.h>
-#endif
-
-#include <linux/vmalloc.h>
-#include <linux/sched.h>
-
-#include "nv-mm.h"
-
 MODULE_PARM_DESC(
    modeset,
    "Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
-bool nv_drm_modeset_module_param = false;
 module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);

 #if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
 MODULE_PARM_DESC(
    fbdev,
    "Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
-bool nv_drm_fbdev_module_param = false;
 module_param_named(fbdev, nv_drm_fbdev_module_param, bool, 0400);
 #endif

-void *nv_drm_calloc(size_t nmemb, size_t size)
-{
-    size_t total_size = nmemb * size;
-    //
-    // Check for overflow.
-    //
-    if ((nmemb != 0) && ((total_size / nmemb) != size))
-    {
-        return NULL;
-    }
-    return kzalloc(nmemb * size, GFP_KERNEL);
-}
-
-void nv_drm_free(void *ptr)
-{
-    if (IS_ERR(ptr)) {
-        return;
-    }
-
-    kfree(ptr);
-}
-
-char *nv_drm_asprintf(const char *fmt, ...)
-{
-    va_list ap;
-    char *p;
-
-    va_start(ap, fmt);
-    p = kvasprintf(GFP_KERNEL, fmt, ap);
-    va_end(ap);
-
-    return p;
-}
-
-#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
-  #define WRITE_COMBINE_FLUSH()    asm volatile("sfence":::"memory")
-#elif defined(NVCPU_PPC64LE)
-  #define WRITE_COMBINE_FLUSH()    asm volatile("sync":::"memory")
-#else
-  #define WRITE_COMBINE_FLUSH()    mb()
-#endif
-
-void nv_drm_write_combine_flush(void)
-{
-    WRITE_COMBINE_FLUSH();
-}
-
-int nv_drm_lock_user_pages(unsigned long address,
-                           unsigned long pages_count, struct page ***pages)
-{
-    struct mm_struct *mm = current->mm;
-    struct page **user_pages;
-    int pages_pinned;
-
-    user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
-
-    if (user_pages == NULL) {
-        return -ENOMEM;
-    }
-
-    nv_mmap_read_lock(mm);
-
-    pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
-                                     user_pages, NULL);
-    nv_mmap_read_unlock(mm);
-
-    if (pages_pinned < 0 || (unsigned)pages_pinned < pages_count) {
-        goto failed;
-    }
-
-    *pages = user_pages;
-
-    return 0;
-
-failed:
-
-    if (pages_pinned > 0) {
-        int i;
-
-        for (i = 0; i < pages_pinned; i++) {
-           NV_UNPIN_USER_PAGE(user_pages[i]);
-        }
-    }
-
-    nv_drm_free(user_pages);
-
-    return (pages_pinned < 0) ? pages_pinned : -EINVAL;
-}
-
-void nv_drm_unlock_user_pages(unsigned long  pages_count, struct page **pages)
-{
-    unsigned long i;
-
-    for (i = 0; i < pages_count; i++) {
-        set_page_dirty_lock(pages[i]);
-        NV_UNPIN_USER_PAGE(pages[i]);
-    }
-
-    nv_drm_free(pages);
-}
-
-void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
-{
-    return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
-}
-
-void nv_drm_vunmap(void *address)
-{
-    vunmap(address);
-}
-
-bool nv_drm_workthread_init(nv_drm_workthread *worker, const char *name)
-{
-    worker->shutting_down = false;
-    if (nv_kthread_q_init(&worker->q, name)) {
-        return false;
-    }
-
-    spin_lock_init(&worker->lock);
-
-    return true;
-}
-
-void nv_drm_workthread_shutdown(nv_drm_workthread *worker)
-{
-    unsigned long flags;
-
-    spin_lock_irqsave(&worker->lock, flags);
-    worker->shutting_down = true;
-    spin_unlock_irqrestore(&worker->lock, flags);
-
-    nv_kthread_q_stop(&worker->q);
-}
-
-void nv_drm_workthread_work_init(nv_drm_work *work,
-                                 void (*callback)(void *),
-                                 void *arg)
-{
-    nv_kthread_q_item_init(work, callback, arg);
-}
-
-int nv_drm_workthread_add_work(nv_drm_workthread *worker, nv_drm_work *work)
-{
-    unsigned long flags;
-    int ret = 0;
-
-    spin_lock_irqsave(&worker->lock, flags);
-    if (!worker->shutting_down) {
-        ret = nv_kthread_q_schedule_q_item(&worker->q, work);
-    }
-    spin_unlock_irqrestore(&worker->lock, flags);
-
-    return ret;
-}
-
-void nv_drm_timer_setup(nv_drm_timer *timer, void (*callback)(nv_drm_timer *nv_drm_timer))
-{
-    nv_timer_setup(timer, callback);
-}
-
-void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long timeout_native)
-{
-    mod_timer(&timer->kernel_timer, timeout_native);
-}
-
-unsigned long nv_drm_timer_now(void)
-{
-    return jiffies;
-}
-
-unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
-{
-    return jiffies + msecs_to_jiffies(relative_timeout_ms);
-}
-
-bool nv_drm_del_timer_sync(nv_drm_timer *timer)
-{
-    if (del_timer_sync(&timer->kernel_timer)) {
-        return true;
-    } else {
-        return false;
-    }
-}
-
-#if defined(NV_DRM_FENCE_AVAILABLE)
-int nv_drm_create_sync_file(nv_dma_fence_t *fence)
-{
-#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
-    struct sync_file *sync;
-    int fd = get_unused_fd_flags(O_CLOEXEC);
-
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* sync_file_create() generates its own reference to the fence */
-    sync = sync_file_create(fence);
-
-    if (IS_ERR(sync)) {
-        put_unused_fd(fd);
-        return PTR_ERR(sync);
-    }
-
-    fd_install(fd, sync->file);
-
-    return fd;
-#else /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
-    return -EINVAL;
-#endif  /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
-}
-
-nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd)
-{
-#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
-    return sync_file_get_fence(fd);
-#else /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
-    return NULL;
-#endif  /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
-}
-#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
-
-void nv_drm_yield(void)
-{
-    set_current_state(TASK_INTERRUPTIBLE);
-    schedule_timeout(1);
-}
-
 #endif /* NV_DRM_AVAILABLE */

 /*************************************************************************
--- a/kernel-open/nvidia-drm/nvidia-drm-modeset.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-modeset.c
@ -321,6 +321,24 @@ int nv_drm_atomic_check(struct drm_device *dev,
 {
    int ret = 0;

+#if defined(NV_DRM_COLOR_MGMT_AVAILABLE)
+    struct drm_crtc *crtc;
+    struct drm_crtc_state *crtc_state;
+    int i;
+
+    nv_drm_for_each_crtc_in_state(state, crtc, crtc_state, i) {
+        /*
+         * if the color management changed on the crtc, we need to update the
+         * crtc's plane's CSC matrices, so add the crtc's planes to the commit
+         */
+        if (crtc_state->color_mgmt_changed) {
+            if ((ret = drm_atomic_add_affected_planes(state, crtc)) != 0) {
+                goto done;
+            }
+        }
+    }
+#endif /* NV_DRM_COLOR_MGMT_AVAILABLE */
+
    if ((ret = drm_atomic_helper_check(dev, state)) != 0) {
        goto done;
    }
--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.c
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.c
@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2015-2023, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/slab.h>
+
+#include "nvidia-drm-os-interface.h"
+
+#if defined(NV_DRM_AVAILABLE)
+
+#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
+#include <linux/file.h>
+#include <linux/sync_file.h>
+#endif
+
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/device.h>
+
+#include "nv-mm.h"
+
+#if defined(NV_DRM_DRMP_H_PRESENT)
+#include <drm/drmP.h>
+#endif
+
+bool nv_drm_modeset_module_param = false;
+bool nv_drm_fbdev_module_param = false;
+
+void *nv_drm_calloc(size_t nmemb, size_t size)
+{
+    size_t total_size = nmemb * size;
+    //
+    // Check for overflow.
+    //
+    if ((nmemb != 0) && ((total_size / nmemb) != size))
+    {
+        return NULL;
+    }
+    return kzalloc(nmemb * size, GFP_KERNEL);
+}
+
+void nv_drm_free(void *ptr)
+{
+    if (IS_ERR(ptr)) {
+        return;
+    }
+
+    kfree(ptr);
+}
+
+char *nv_drm_asprintf(const char *fmt, ...)
+{
+    va_list ap;
+    char *p;
+
+    va_start(ap, fmt);
+    p = kvasprintf(GFP_KERNEL, fmt, ap);
+    va_end(ap);
+
+    return p;
+}
+
+#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
+  #define WRITE_COMBINE_FLUSH()    asm volatile("sfence":::"memory")
+#elif defined(NVCPU_PPC64LE)
+  #define WRITE_COMBINE_FLUSH()    asm volatile("sync":::"memory")
+#else
+  #define WRITE_COMBINE_FLUSH()    mb()
+#endif
+
+void nv_drm_write_combine_flush(void)
+{
+    WRITE_COMBINE_FLUSH();
+}
+
+int nv_drm_lock_user_pages(unsigned long address,
+                           unsigned long pages_count, struct page ***pages)
+{
+    struct mm_struct *mm = current->mm;
+    struct page **user_pages;
+    int pages_pinned;
+
+    user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
+
+    if (user_pages == NULL) {
+        return -ENOMEM;
+    }
+
+    nv_mmap_read_lock(mm);
+
+    pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
+                                     user_pages);
+    nv_mmap_read_unlock(mm);
+
+    if (pages_pinned < 0 || (unsigned)pages_pinned < pages_count) {
+        goto failed;
+    }
+
+    *pages = user_pages;
+
+    return 0;
+
+failed:
+
+    if (pages_pinned > 0) {
+        int i;
+
+        for (i = 0; i < pages_pinned; i++) {
+           NV_UNPIN_USER_PAGE(user_pages[i]);
+        }
+    }
+
+    nv_drm_free(user_pages);
+
+    return (pages_pinned < 0) ? pages_pinned : -EINVAL;
+}
+
+void nv_drm_unlock_user_pages(unsigned long  pages_count, struct page **pages)
+{
+    unsigned long i;
+
+    for (i = 0; i < pages_count; i++) {
+        set_page_dirty_lock(pages[i]);
+        NV_UNPIN_USER_PAGE(pages[i]);
+    }
+
+    nv_drm_free(pages);
+}
+
+/*
+ * linuxkpi vmap doesn't use the flags argument as it
+ * doesn't seem to be needed. Define VM_USERMAP to 0
+ * to make errors go away
+ *
+ * vmap: sys/compat/linuxkpi/common/src/linux_compat.c
+ */
+#if defined(NV_BSD)
+#define VM_USERMAP 0
+#endif
+
+void *nv_drm_vmap(struct page **pages, unsigned long pages_count)
+{
+    return vmap(pages, pages_count, VM_USERMAP, PAGE_KERNEL);
+}
+
+void nv_drm_vunmap(void *address)
+{
+    vunmap(address);
+}
+
+bool nv_drm_workthread_init(nv_drm_workthread *worker, const char *name)
+{
+    worker->shutting_down = false;
+    if (nv_kthread_q_init(&worker->q, name)) {
+        return false;
+    }
+
+    spin_lock_init(&worker->lock);
+
+    return true;
+}
+
+void nv_drm_workthread_shutdown(nv_drm_workthread *worker)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&worker->lock, flags);
+    worker->shutting_down = true;
+    spin_unlock_irqrestore(&worker->lock, flags);
+
+    nv_kthread_q_stop(&worker->q);
+}
+
+void nv_drm_workthread_work_init(nv_drm_work *work,
+                                 void (*callback)(void *),
+                                 void *arg)
+{
+    nv_kthread_q_item_init(work, callback, arg);
+}
+
+int nv_drm_workthread_add_work(nv_drm_workthread *worker, nv_drm_work *work)
+{
+    unsigned long flags;
+    int ret = 0;
+
+    spin_lock_irqsave(&worker->lock, flags);
+    if (!worker->shutting_down) {
+        ret = nv_kthread_q_schedule_q_item(&worker->q, work);
+    }
+    spin_unlock_irqrestore(&worker->lock, flags);
+
+    return ret;
+}
+
+void nv_drm_timer_setup(nv_drm_timer *timer, void (*callback)(nv_drm_timer *nv_drm_timer))
+{
+    nv_timer_setup(timer, callback);
+}
+
+void nv_drm_mod_timer(nv_drm_timer *timer, unsigned long timeout_native)
+{
+    mod_timer(&timer->kernel_timer, timeout_native);
+}
+
+unsigned long nv_drm_timer_now(void)
+{
+    return jiffies;
+}
+
+unsigned long nv_drm_timeout_from_ms(NvU64 relative_timeout_ms)
+{
+    return jiffies + msecs_to_jiffies(relative_timeout_ms);
+}
+
+bool nv_drm_del_timer_sync(nv_drm_timer *timer)
+{
+    if (del_timer_sync(&timer->kernel_timer)) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+#if defined(NV_DRM_FENCE_AVAILABLE)
+int nv_drm_create_sync_file(nv_dma_fence_t *fence)
+{
+#if defined(NV_LINUX_SYNC_FILE_H_PRESENT)
+    struct sync_file *sync;
+    int fd = get_unused_fd_flags(O_CLOEXEC);
+
+    if (fd < 0) {
+        return fd;
+    }
+
+    /* sync_file_create() generates its own reference to the fence */
+    sync = sync_file_create(fence);
+
+    if (IS_ERR(sync)) {
+        put_unused_fd(fd);
+        return PTR_ERR(sync);
+    }
+
+    fd_install(fd, sync->file);
+
+    return fd;
+#else /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
+    return -EINVAL;
+#endif  /* defined(NV_LINUX_SYNC_FILE_H_PRESENT) */
+}
+
+nv_dma_fence_t *nv_drm_sync_file_get_fence(int fd)
+{
+#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
+    return sync_file_get_fence(fd);
+#else /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
+    return NULL;
+#endif  /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
+}
+#endif /* defined(NV_DRM_FENCE_AVAILABLE) */
+
+void nv_drm_yield(void)
+{
+    set_current_state(TASK_INTERRUPTIBLE);
+    schedule_timeout(1);
+}
+
+#endif /* NV_DRM_AVAILABLE */
--- a/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-os-interface.h
@ -33,7 +33,7 @@
 #include "nvidia-dma-fence-helper.h"
 #endif

-#if defined(NV_LINUX)
+#if defined(NV_LINUX) || defined(NV_BSD)
 #include "nv-kthread-q.h"
 #include "linux/spinlock.h"

@ -45,18 +45,18 @@ typedef struct nv_drm_workthread {

 typedef nv_kthread_q_item_t nv_drm_work;

-#else /* defined(NV_LINUX) */
+#else
 #error "Need to define deferred work primitives for this OS"
-#endif /* else defined(NV_LINUX) */
+#endif

-#if defined(NV_LINUX)
+#if defined(NV_LINUX) || defined(NV_BSD)
 #include "nv-timer.h"

 typedef struct nv_timer nv_drm_timer;

-#else /* defined(NV_LINUX) */
+#else
 #error "Need to define kernel timer callback primitives for this OS"
-#endif /* else defined(NV_LINUX) */
+#endif

 #if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
 #define NV_DRM_FBDEV_GENERIC_AVAILABLE
--- a/kernel-open/nvidia-drm/nvidia-drm-priv.h
+++ b/kernel-open/nvidia-drm/nvidia-drm-priv.h
@ -126,6 +126,7 @@ struct nv_drm_device {
    NvU64 modifiers[6 /* block linear */ + 1 /* linear */ + 1 /* terminator */];
 #endif

+    struct delayed_work hotplug_event_work;
    atomic_t enable_event_handling;

    /**
--- a/kernel-open/nvidia-drm/nvidia-drm-sources.mk
+++ b/kernel-open/nvidia-drm/nvidia-drm-sources.mk
@ -0,0 +1,131 @@
+###########################################################################
+# Kbuild fragment for nvidia-drm.ko
+###########################################################################
+
+#
+# Define NVIDIA_DRM_SOURCES
+#
+
+NVIDIA_DRM_SOURCES =
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-drv.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-utils.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-crtc.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-encoder.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nv-kthread-q.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-nvkms-memory.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-user-memory.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-dma-buf.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-format.c
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-os-interface.c
+
+#
+# Register the conftests needed by nvidia-drm.ko
+#
+
+NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
+NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
+NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
+NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
+NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
+NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
+NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
+
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_put
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_format_num_planes
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_for_each_possible_encoder
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_rotation_available
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_vma_offset_exact_lookup_locked
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += nvhost_dma_fence_unpack
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
+
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_irq
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_name
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_device_list
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_set_busid
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_connectors_changed
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_init_function_args
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_helper_mode_fill_fb_struct
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_drop_has_from_release_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_unload_has_int_return_type
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_crtc_destroy_state_has_crtc_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_plane_destroy_state_has_plane_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_object_find_has_file_priv_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += dma_buf_owner
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_list_iter
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_swap_state_has_stall_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_prime_flag_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
+NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_offset_node_has_readonly
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_display_mode_has_vrefresh
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_master_set_has_int_return_type
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_free_object
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_prime_pages_to_sg_has_drm_device_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_callbacks
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_atomic_check_has_atomic_state_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_vmap_has_map_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
+NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
+NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
+NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
+NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
+NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
--- a/kernel-open/nvidia-drm/nvidia-drm.Kbuild
+++ b/kernel-open/nvidia-drm/nvidia-drm.Kbuild
@ -2,30 +2,16 @@
 # Kbuild fragment for nvidia-drm.ko
 ###########################################################################

+# Get our source file list and conftest list from the common file
+include $(src)/nvidia-drm/nvidia-drm-sources.mk
+
+# Linux-specific sources
+NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
+
 #
 # Define NVIDIA_DRM_{SOURCES,OBJECTS}
 #

-NVIDIA_DRM_SOURCES =
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-drv.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-utils.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-crtc.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-encoder.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nv-kthread-q.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-nvkms-memory.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-user-memory.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem-dma-buf.c
-NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-format.c
-
 NVIDIA_DRM_OBJECTS = $(patsubst %.c,%.o,$(NVIDIA_DRM_SOURCES))

 obj-m += nvidia-drm.o
@ -44,107 +30,4 @@ NVIDIA_DRM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0

 $(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_DRM_OBJECTS), $(NVIDIA_DRM_CFLAGS))

-#
-# Register the conftests needed by nvidia-drm.ko
-#
-
 NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_DRM_OBJECTS)
-
-NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_available
-NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_atomic_available
-NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_inc
-NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_gpl_refcount_dec_and_test
-NV_CONFTEST_GENERIC_COMPILE_TESTS += drm_alpha_blending_available
-NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_fd_to_handle
-NV_CONFTEST_GENERIC_COMPILE_TESTS += is_export_symbol_present_drm_gem_prime_handle_to_fd
-
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_put
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_format_num_planes
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_for_each_possible_encoder
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_rotation_available
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_vma_offset_exact_lookup_locked
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += nvhost_dma_fence_unpack
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += dma_fence_set_error
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
-
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_irq
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_get_name
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_device_list
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_legacy_dev_list
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_set_busid
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_connectors_changed
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_init_function_args
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_helper_mode_fill_fb_struct
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_drop_has_from_release_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_unload_has_int_return_type
-NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
-NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_crtc_destroy_state_has_crtc_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_plane_destroy_state_has_plane_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_object_find_has_file_priv_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += dma_buf_owner
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_list_iter
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_atomic_helper_swap_state_has_stall_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_prime_flag_present
-NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
-NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_offset_node_has_readonly
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_display_mode_has_vrefresh
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_master_set_has_int_return_type
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_free_object
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_prime_pages_to_sg_has_drm_device_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_callbacks
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_atomic_check_has_atomic_state_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_vmap_has_map_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
-NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
-NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
-NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
-NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
-NV_CONFTEST_TYPE_COMPILE_TESTS += fence_ops_use_64bit_seqno
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers_has_driver_arg
-NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
--- a/kernel-open/nvidia-drm/nvidia-drm.c
+++ b/kernel-open/nvidia-drm/nvidia-drm.c
@ -45,6 +45,7 @@ int nv_drm_init(void)
        return -EINVAL;
    }

+    nvKms->setSuspendResumeCallback(nv_drm_suspend_resume);
    return nv_drm_probe_devices();
 #else
    return 0;
@ -54,6 +55,7 @@ int nv_drm_init(void)
 void nv_drm_exit(void)
 {
 #if defined(NV_DRM_AVAILABLE)
+    nvKms->setSuspendResumeCallback(NULL);
    nv_drm_remove_devices();
 #endif
 }
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@ -35,12 +35,13 @@
 #include <linux/list.h>
 #include <linux/rwsem.h>
 #include <linux/freezer.h>
+#include <linux/poll.h>
+#include <linux/cdev.h>

 #include <acpi/video.h>

 #include "nvstatus.h"

-#include "nv-register-module.h"
 #include "nv-modeset-interface.h"
 #include "nv-kref.h"

@ -53,6 +54,7 @@
 #include "nv-kthread-q.h"
 #include "nv-time.h"
 #include "nv-lock.h"
+#include "nv-chardev-numbers.h"

 #if !defined(CONFIG_RETPOLINE)
 #include "nv-retpoline.h"
@ -74,6 +76,12 @@ module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0
 static bool hdmi_deepcolor = false;
 module_param_named(hdmi_deepcolor, hdmi_deepcolor, bool, 0400);

+static bool vblank_sem_control = false;
+module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
+
+static bool opportunistic_display_sync = true;
+module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
+
 /* These parameters are used for fault injection tests.  Normally the defaults
 * should be used. */
 MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@ -117,6 +125,16 @@ NvBool nvkms_hdmi_deepcolor(void)
    return hdmi_deepcolor;
 }

+NvBool nvkms_vblank_sem_control(void)
+{
+    return vblank_sem_control;
+}
+
+NvBool nvkms_opportunistic_display_sync(void)
+{
+    return opportunistic_display_sync;
+}
+
 #define NVKMS_SYNCPT_STUBS_NEEDED

 /*************************************************************************
@ -482,6 +500,7 @@ nvkms_event_queue_changed(nvkms_per_open_handle_t *pOpenKernel,
 static void nvkms_suspend(NvU32 gpuId)
 {
    if (gpuId == 0) {
+        nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
        nvkms_write_lock_pm_lock();
    }

@ -498,6 +517,7 @@ static void nvkms_resume(NvU32 gpuId)

    if (gpuId == 0) {
        nvkms_write_unlock_pm_lock();
+        nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
    }
 }

@ -827,49 +847,6 @@ void nvkms_free_timer(nvkms_timer_handle_t *handle)
    timer->cancel = NV_TRUE;
 }

-void* nvkms_get_per_open_data(int fd)
-{
-    struct file *filp = fget(fd);
-    struct nvkms_per_open *popen = NULL;
-    dev_t rdev = 0;
-    void *data = NULL;
-
-    if (filp == NULL) {
-        return NULL;
-    }
-
-    if (filp->f_inode == NULL) {
-        goto done;
-    }
-    rdev = filp->f_inode->i_rdev;
-
-    if ((MAJOR(rdev) != NVKMS_MAJOR_DEVICE_NUMBER) ||
-        (MINOR(rdev) != NVKMS_MINOR_DEVICE_NUMBER)) {
-        goto done;
-    }
-
-    popen = filp->private_data;
-    if (popen == NULL) {
-        goto done;
-    }
-
-    data = popen->data;
-
-done:
-    /*
-     * fget() incremented the struct file's reference count, which
-     * needs to be balanced with a call to fput().  It is safe to
-     * decrement the reference count before returning
-     * filp->private_data because core NVKMS is currently holding the
-     * nvkms_lock, which prevents the nvkms_close() => nvKmsClose()
-     * call chain from freeing the file out from under the caller of
-     * nvkms_get_per_open_data().
-     */
-    fput(filp);
-
-    return data;
-}
-
 NvBool nvkms_fd_is_nvidia_chardev(int fd)
 {
    struct file *filp = fget(fd);
@ -1621,6 +1598,12 @@ static int nvkms_ioctl(struct inode *inode, struct file *filp,
    return status;
 }

+static long nvkms_unlocked_ioctl(struct file *filp, unsigned int cmd,
+                                 unsigned long arg)
+{
+    return nvkms_ioctl(filp->f_inode, filp, cmd, arg);
+}
+
 static unsigned int nvkms_poll(struct file *filp, poll_table *wait)
 {
    unsigned int mask = 0;
@ -1648,17 +1631,73 @@ static unsigned int nvkms_poll(struct file *filp, poll_table *wait)
 * Module loading support code.
 *************************************************************************/

-static nvidia_module_t nvidia_modeset_module = {
+#define NVKMS_RDEV  (MKDEV(NV_MAJOR_DEVICE_NUMBER, \
+                           NV_MINOR_DEVICE_NUMBER_MODESET_DEVICE))
+
+static struct file_operations nvkms_fops = {
    .owner       = THIS_MODULE,
-    .module_name = "nvidia-modeset",
-    .instance    = 1, /* minor number: 255-1=254 */
-    .open        = nvkms_open,
-    .close       = nvkms_close,
-    .mmap        = nvkms_mmap,
-    .ioctl       = nvkms_ioctl,
    .poll        = nvkms_poll,
+    .unlocked_ioctl = nvkms_unlocked_ioctl,
+#if NVCPU_IS_X86_64 || NVCPU_IS_AARCH64
+    .compat_ioctl = nvkms_unlocked_ioctl,
+#endif
+    .mmap        = nvkms_mmap,
+    .open        = nvkms_open,
+    .release     = nvkms_close,
 };

+static struct cdev nvkms_device_cdev;
+
+static int __init nvkms_register_chrdev(void)
+{
+    int ret;
+
+    ret = register_chrdev_region(NVKMS_RDEV, 1, "nvidia-modeset");
+    if (ret < 0) {
+        return ret;
+    }
+
+    cdev_init(&nvkms_device_cdev, &nvkms_fops);
+    ret = cdev_add(&nvkms_device_cdev, NVKMS_RDEV, 1);
+    if (ret < 0) {
+        unregister_chrdev_region(NVKMS_RDEV, 1);
+        return ret;
+    }
+
+    return ret;
+}
+
+static void nvkms_unregister_chrdev(void)
+{
+    cdev_del(&nvkms_device_cdev);
+    unregister_chrdev_region(NVKMS_RDEV, 1);
+}
+
+void* nvkms_get_per_open_data(int fd)
+{
+    struct file *filp = fget(fd);
+    void *data = NULL;
+
+    if (filp) {
+        if (filp->f_op == &nvkms_fops && filp->private_data) {
+            struct nvkms_per_open *popen = filp->private_data;
+            data = popen->data;
+        }
+
+        /*
+         * fget() incremented the struct file's reference count, which needs to
+         * be balanced with a call to fput().  It is safe to decrement the
+         * reference count before returning filp->private_data because core
+         * NVKMS is currently holding the nvkms_lock, which prevents the
+         * nvkms_close() => nvKmsClose() call chain from freeing the file out
+         * from under the caller of nvkms_get_per_open_data().
+         */
+        fput(filp);
+    }
+
+    return data;
+}
+
 static int __init nvkms_init(void)
 {
    int ret;
@ -1689,10 +1728,9 @@ static int __init nvkms_init(void)
    INIT_LIST_HEAD(&nvkms_timers.list);
    spin_lock_init(&nvkms_timers.lock);

-    ret = nvidia_register_module(&nvidia_modeset_module);
-
+    ret = nvkms_register_chrdev();
    if (ret != 0) {
-        goto fail_register_module;
+        goto fail_register_chrdev;
    }

    down(&nvkms_lock);
@ -1711,8 +1749,8 @@ static int __init nvkms_init(void)
    return 0;

 fail_module_load:
-    nvidia_unregister_module(&nvidia_modeset_module);
-fail_register_module:
+    nvkms_unregister_chrdev();
+fail_register_chrdev:
    nv_kthread_q_stop(&nvkms_deferred_close_kthread_q);
 fail_deferred_close_kthread:
    nv_kthread_q_stop(&nvkms_kthread_q);
@ -1776,7 +1814,7 @@ restart:
    nv_kthread_q_stop(&nvkms_deferred_close_kthread_q);
    nv_kthread_q_stop(&nvkms_kthread_q);

-    nvidia_unregister_module(&nvidia_modeset_module);
+    nvkms_unregister_chrdev();
    nvkms_free_rm();

    if (malloc_verbose) {
--- a/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-os-interface.h
@ -100,6 +100,8 @@ NvBool nvkms_output_rounding_fix(void);
 NvBool nvkms_disable_hdmi_frl(void);
 NvBool nvkms_disable_vrr_memclk_switch(void);
 NvBool nvkms_hdmi_deepcolor(void);
+NvBool nvkms_vblank_sem_control(void);
+NvBool nvkms_opportunistic_display_sync(void);

 void   nvkms_call_rm    (void *ops);
 void*  nvkms_alloc      (size_t size,
--- a/kernel-open/nvidia-modeset/nvkms.h
+++ b/kernel-open/nvidia-modeset/nvkms.h
@ -103,6 +103,8 @@ NvBool nvKmsKapiGetFunctionsTableInternal
    struct NvKmsKapiFunctionsTable *funcsTable
 );

+void nvKmsKapiSuspendResume(NvBool suspend);
+
 NvBool nvKmsGetBacklight(NvU32 display_id, void *drv_priv, NvU32 *brightness);
 NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);

--- a/kernel-open/nvidia-peermem/nvidia-peermem.c
+++ b/kernel-open/nvidia-peermem/nvidia-peermem.c
@ -1,8 +1,13 @@
-/* SPDX-License-Identifier: Linux-OpenIB */
 /*
 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
 *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
@ -43,7 +48,9 @@

 MODULE_AUTHOR("Yishai Hadas");
 MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
-MODULE_LICENSE("Linux-OpenIB");
+
+MODULE_LICENSE("Dual BSD/GPL");
+
 MODULE_VERSION(DRV_VERSION);
 enum {
        NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
@ -53,7 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
 module_param(peerdirect_support, int, S_IRUGO);
 MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");

-#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
+
+#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
+#ifdef NV_MEM_DEBUG
+#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
+#else
+#define peer_trace(FMT, ARGS...) do {} while (0)
+#endif

 #if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)

@ -74,7 +87,10 @@ invalidate_peer_memory mem_invalidate_callback;
 static void *reg_handle = NULL;
 static void *reg_handle_nc = NULL;

+#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
+
 struct nv_mem_context {
+    u64 pad1;
    struct nvidia_p2p_page_table *page_table;
    struct nvidia_p2p_dma_mapping *dma_mapping;
    u64 core_context;
@ -86,8 +102,22 @@ struct nv_mem_context {
    struct task_struct *callback_task;
    int sg_allocated;
    struct sg_table sg_head;
+    u64 pad2;
 };

+#define NV_MEM_CONTEXT_CHECK_OK(MC) ({                                  \
+    struct nv_mem_context *mc = (MC);                                   \
+    int rc = ((0 != mc) &&                                              \
+              (READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) &&          \
+              (READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC));           \
+    if (!rc) {                                                          \
+        peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
+                   mc,                                                  \
+                   mc?mc->pad1:0,                                       \
+                   mc?mc->pad2:0);                                      \
+    }                                                                   \
+    rc;                                                                 \
+})

 static void nv_get_p2p_free_callback(void *data)
 {
@ -97,8 +127,9 @@ static void nv_get_p2p_free_callback(void *data)
    struct nvidia_p2p_dma_mapping *dma_mapping = NULL;

    __module_get(THIS_MODULE);
-    if (!nv_mem_context) {
-        peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
+
+    if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
+        peer_err("detected invalid context, skipping further processing\n");
        goto out;
    }

@ -169,9 +200,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
        /* Error case handled as not mine */
        return 0;

+    nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
    nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
    nv_mem_context->page_virt_end   = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
    nv_mem_context->mapped_size  = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
+    nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;

    ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
                               &nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
@ -195,6 +228,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
    return 1;

 err:
+    memset(nv_mem_context, 0, sizeof(*nv_mem_context));
    kfree(nv_mem_context);

    /* Error case handled as not mine */
@ -347,6 +381,7 @@ static void nv_mem_release(void *context)
        sg_free_table(&nv_mem_context->sg_head);
        nv_mem_context->sg_allocated = 0;
    }
+    memset(nv_mem_context, 0, sizeof(*nv_mem_context));
    kfree(nv_mem_context);
    module_put(THIS_MODULE);
    return;
--- a/kernel-open/nvidia-uvm/ctrl2080mc.h
+++ b/kernel-open/nvidia-uvm/ctrl2080mc.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2022 NVIDIA Corporation
+    Copyright (c) 2013-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@ -82,12 +82,12 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
-NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
 NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
+NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain

 NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
@ -116,3 +116,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
 NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier

 NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
+NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
--- a/kernel-open/nvidia-uvm/nvstatus.c
+++ b/kernel-open/nvidia-uvm/nvstatus.c
@ -25,7 +25,8 @@

 #if !defined(NV_PRINTF_STRING_SECTION)
 #if defined(NVRM) && NVOS_IS_LIBOS
-#define NV_PRINTF_STRING_SECTION         __attribute__ ((section (".logging")))
+#include "libos_log.h"
+#define NV_PRINTF_STRING_SECTION LIBOS_SECTION_LOGGING
 #else // defined(NVRM) && NVOS_IS_LIBOS
 #define NV_PRINTF_STRING_SECTION
 #endif // defined(NVRM) && NVOS_IS_LIBOS
@ -33,7 +34,7 @@

 /*
 * Include nvstatuscodes.h twice.  Once for creating constant strings in the
- * the NV_PRINTF_STRING_SECTION section of the ececutable, and once to build
+ * the NV_PRINTF_STRING_SECTION section of the executable, and once to build
 * the g_StatusCodeList table.
 */
 #undef NV_STATUS_CODE
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -1053,7 +1053,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
    // are not used because unload_state_buf may be a managed memory pointer and
    // therefore a locking assertion from the CPU fault handler could be fired.
    nv_mmap_read_lock(current->mm);
-    ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
+    ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page);
    nv_mmap_read_unlock(current->mm);

    if (ret < 0)
--- a/kernel-open/nvidia-uvm/uvm.h
+++ b/kernel-open/nvidia-uvm/uvm.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2013-2022 NVIDIA Corporation
+    Copyright (c) 2013-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -45,16 +45,20 @@
 //     #endif
 // 3) Do the same thing for the function definition, and for any structs that
 //    are taken as arguments to these functions.
-// 4) Let this change propagate over to cuda_a, so that the CUDA driver can
-//    start using the new API by bumping up the API version number its using.
-//    This can be found in gpgpu/cuda/cuda.nvmk.
-// 5) Once the cuda_a changes have made it back into chips_a, remove the old API
-//    declaration, definition, and any old structs that were in use.
+// 4) Let this change propagate over to cuda_a and dev_a, so that the CUDA and
+//    nvidia-cfg libraries can start using the new API by bumping up the API
+//    version number it's using.
+//    Places where UVM_API_REVISION is defined are:
+//      drivers/gpgpu/cuda/cuda.nvmk (cuda_a)
+//      drivers/setup/linux/nvidia-cfg/makefile.nvmk (dev_a)
+// 5) Once the dev_a and cuda_a changes have made it back into chips_a,
+//    remove the old API declaration, definition, and any old structs that were
+//    in use.

 #ifndef _UVM_H_
 #define _UVM_H_

-#define UVM_API_LATEST_REVISION 8
+#define UVM_API_LATEST_REVISION 9

 #if !defined(UVM_API_REVISION)
 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@ -180,12 +184,8 @@ NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist);
 //         because it is not very informative.
 //
 //------------------------------------------------------------------------------
-#if UVM_API_REV_IS_AT_MOST(4)
-NV_STATUS UvmInitialize(UvmFileDescriptor fd);
-#else
 NV_STATUS UvmInitialize(UvmFileDescriptor fd,
                        NvU64             flags);
-#endif

 //------------------------------------------------------------------------------
 // UvmDeinitialize
@ -329,7 +329,11 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
 //
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the GPU to register.
+//         UUID of the physical GPU to register.
+//
+//     platformParams: (INPUT)
+//         User handles identifying the GPU partition to register.
+//         This should be NULL if the GPU is not SMC capable or SMC enabled.
 //
 // Error codes:
 //     NV_ERR_NO_MEMORY:
@ -364,27 +368,31 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
 //         OS state required to register the GPU is not present.
 //
 //     NV_ERR_INVALID_STATE:
-//         OS state required to register the GPU is malformed.
+//         OS state required to register the GPU is malformed, or the partition
+//         identified by the user handles or its configuration changed.
 //
 //     NV_ERR_GENERIC:
 //         Unexpected error. We try hard to avoid returning this error code,
 //         because it is not very informative.
 //
 //------------------------------------------------------------------------------
+#if UVM_API_REV_IS_AT_MOST(8)
 NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
+#else
+NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid,
+                         const UvmGpuPlatformParams *platformParams);
+#endif

+#if UVM_API_REV_IS_AT_MOST(8)
 //------------------------------------------------------------------------------
 // UvmRegisterGpuSmc
 //
 // The same as UvmRegisterGpu, but takes additional parameters to specify the
 // GPU partition being registered if SMC is enabled.
 //
-// TODO: Bug 2844714: Merge UvmRegisterGpuSmc() with UvmRegisterGpu() once
-//       the initial SMC support is in place.
-//
 // Arguments:
 //     gpuUuid: (INPUT)
-//         UUID of the parent GPU of the SMC partition to register.
+//         UUID of the physical GPU of the SMC partition to register.
 //
 //     platformParams: (INPUT)
 //         User handles identifying the partition to register.
@ -397,6 +405,7 @@ NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
 //
 NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
                            const UvmGpuPlatformParams *platformParams);
+#endif

 //------------------------------------------------------------------------------
 // UvmUnregisterGpu
@ -1416,8 +1425,7 @@ NV_STATUS UvmAllocSemaphorePool(void                          *base,
 //
 //     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if the destination processor is
-//         the CPU. This argument is ignored if the given virtual address range
-//         corresponds to managed memory.
+//         the CPU.
 //
 // Error codes:
 //     NV_ERR_INVALID_ADDRESS:
@ -1456,16 +1464,10 @@ NV_STATUS UvmAllocSemaphorePool(void                          *base,
 //         pages were associated with a non-migratable range group.
 //
 //------------------------------------------------------------------------------
-#if UVM_API_REV_IS_AT_MOST(5)
-NV_STATUS UvmMigrate(void                  *base,
-                     NvLength               length,
-                     const NvProcessorUuid *destinationUuid);
-#else
 NV_STATUS UvmMigrate(void                  *base,
                     NvLength               length,
                     const NvProcessorUuid *destinationUuid,
                     NvS32                  preferredCpuMemoryNode);
-#endif

 //------------------------------------------------------------------------------
 // UvmMigrateAsync
@ -1547,20 +1549,12 @@ NV_STATUS UvmMigrate(void                  *base,
 //         pages were associated with a non-migratable range group.
 //
 //------------------------------------------------------------------------------
-#if UVM_API_REV_IS_AT_MOST(5)
-NV_STATUS UvmMigrateAsync(void                  *base,
-                          NvLength               length,
-                          const NvProcessorUuid *destinationUuid,
-                          void                  *semaphoreAddress,
-                          NvU32                  semaphorePayload);
-#else
 NV_STATUS UvmMigrateAsync(void                  *base,
                          NvLength               length,
                          const NvProcessorUuid *destinationUuid,
                          NvS32                  preferredCpuMemoryNode,
                          void                  *semaphoreAddress,
                          NvU32                  semaphorePayload);
-#endif

 //------------------------------------------------------------------------------
 // UvmMigrateRangeGroup
@ -1568,9 +1562,7 @@ NV_STATUS UvmMigrateAsync(void                  *base,
 // Migrates the backing of all virtual address ranges associated with the given
 // range group to the specified destination processor. The behavior of this API
 // is equivalent to calling UvmMigrate on each VA range associated with this
-// range group. The value for the preferredCpuMemoryNode is irrelevant in this
-// case as it only applies to migrations of pageable address, which cannot be
-// used to create range groups.
+// range group.
 //
 // Any errors encountered during migration are returned immediately. No attempt
 // is made to migrate the remaining unmigrated ranges and the ranges that are
@ -2303,13 +2295,10 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //     preferredLocationUuid: (INPUT)
 //         UUID of the preferred location.
 //
-//     preferredCpuNumaNode: (INPUT)
+//     preferredCpuMemoryNode: (INPUT)
 //         Preferred CPU NUMA memory node used if preferredLocationUuid is the
 //         UUID of the CPU. -1 is a special value which indicates all CPU nodes
-//         allowed by the global and thread memory policies. This argument is
-//         ignored if preferredLocationUuid refers to a GPU or the given virtual
-//         address range corresponds to managed memory. If NUMA is not enabled,
-//         only 0 or -1 is allowed.
+//         allowed by the global and thread memory policies.
 //
 // Errors:
 //     NV_ERR_INVALID_ADDRESS:
@ -2339,10 +2328,11 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //
 //      NV_ERR_INVALID_ARGUMENT:
 //         One of the following occured:
-//         - preferredLocationUuid is the UUID of a CPU and preferredCpuNumaNode
-//           refers to a registered GPU.
-//         - preferredCpuNumaNode is invalid and preferredLocationUuid is the
-//           UUID of the CPU.
+//         - preferredLocationUuid is the UUID of the CPU and
+//           preferredCpuMemoryNode is either:
+//              - not a valid NUMA node,
+//              - not a possible NUMA node, or
+//              - a NUMA node ID corresponding to a registered GPU.
 //
 //     NV_ERR_NOT_SUPPORTED:
 //         The UVM file descriptor is associated with another process and the
@ -2353,16 +2343,10 @@ NV_STATUS UvmDisableReadDuplication(void     *base,
 //         because it is not very informative.
 //
 //------------------------------------------------------------------------------
-#if UVM_API_REV_IS_AT_MOST(7)
-NV_STATUS UvmSetPreferredLocation(void                  *base,
-                                  NvLength               length,
-                                  const NvProcessorUuid *preferredLocationUuid);
-#else
 NV_STATUS UvmSetPreferredLocation(void                  *base,
                                  NvLength               length,
                                  const NvProcessorUuid *preferredLocationUuid,
-                                  NvS32                  preferredCpuNumaNode);
-#endif
+                                  NvS32                  preferredCpuMemoryNode);

 //------------------------------------------------------------------------------
 // UvmUnsetPreferredLocation
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -79,6 +79,8 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

+    parent_gpu->access_counters_can_use_physical_addresses = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@ -94,4 +96,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-20221 NVIDIA Corporation
+    Copyright (c) 2018-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -38,10 +38,12 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);

-    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
+    parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
+                                                          parent_gpu->utlb_per_gpc_count;
    {
        uvm_fault_buffer_entry_t *dummy;
-        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
+        UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
+                                                                           (sizeof(dummy->fault_source.utlb_id) * 8)));
    }

    // A single top level PDE on Ampere covers 128 TB and that's the minimum
@ -53,7 +55,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

    // See uvm_mmu.h for mapping placement
-    parent_gpu->flat_vidmem_va_base = 136 * UVM_SIZE_1TB;
+    parent_gpu->flat_vidmem_va_base = 160 * UVM_SIZE_1TB;
    parent_gpu->flat_sysmem_va_base = 256 * UVM_SIZE_1TB;

    parent_gpu->ce_phys_vidmem_write_supported = true;
@ -81,6 +83,8 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

+    parent_gpu->access_counters_can_use_physical_addresses = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@ -101,4 +105,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
        parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_ce.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2022 NVIDIA Corporation
+    Copyright (c) 2018-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -117,7 +117,7 @@ bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t
    NvU64 push_begin_gpu_va;
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+    if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
        return true;

    if (uvm_channel_is_proxy(push->channel)) {
@ -196,7 +196,7 @@ bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+    if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
        return true;

    if (uvm_channel_is_proxy(push->channel)) {
--- a/kernel-open/nvidia-uvm/uvm_ampere_host.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere_host.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2018-2022 NVIDIA Corporation
+    Copyright (c) 2018-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -33,7 +33,7 @@ bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address,
 {
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

-    if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
+    if (!uvm_parent_gpu_is_virt_mode_sriov_heavy(gpu->parent))
        return true;

    if (uvm_channel_is_privileged(push->channel)) {
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@ -34,19 +34,32 @@
 #include <linux/hmm.h>
 #endif

-static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
+typedef enum
+{
+    UVM_ATS_SERVICE_TYPE_FAULTS = 0,
+    UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
+    UVM_ATS_SERVICE_TYPE_COUNT
+} uvm_ats_service_type_t;
+
+static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      NvU64 start,
                                      size_t length,
                                      uvm_fault_access_type_t access_type,
+                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    struct mm_struct *mm = va_space->va_space_mm.mm;
-    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
    NV_STATUS status;
    NvU64 user_space_start;
    NvU64 user_space_length;
+    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
+    bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
+    uvm_populate_permissions_t populate_permissions = fault_service_type ?
+                                            (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
+                                            UVM_POPULATE_PERMISSIONS_INHERIT;
+

    // Request uvm_migrate_pageable() to touch the corresponding page after
    // population.
@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
        .dst_node_id                    = ats_context->residency_node,
        .start                          = start,
        .length                         = length,
-        .populate_permissions           = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
-        .touch                          = true,
-        .skip_mapped                    = true,
-        .populate_on_cpu_alloc_failures = true,
+        .populate_permissions           = populate_permissions,
+        .touch                          = fault_service_type,
+        .skip_mapped                    = fault_service_type,
+        .populate_on_cpu_alloc_failures = fault_service_type,
        .user_space_start               = &user_space_start,
        .user_space_length              = &user_space_length,
    };
@ -107,26 +120,24 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
+static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
                                NvU64 addr,
                                size_t size,
                                uvm_fault_client_type_t client_type)
 {
    uvm_ats_fault_invalidate_t *ats_invalidate;

-    uvm_ats_smmu_invalidate_tlbs(gpu_va_space, addr, size);
-
    if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
    else
        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;

-    if (!ats_invalidate->write_faults_in_batch) {
-        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
-        ats_invalidate->write_faults_in_batch = true;
+    if (!ats_invalidate->tlb_batch_pending) {
+        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
+        ats_invalidate->tlb_batch_pending = true;
    }

-    uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
+    uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
 }

 static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
@ -192,7 +203,7 @@ done:
    ats_context->prefetch_state.has_preferred_location = false;
 #endif

-    ats_context->residency_id = gpu ? gpu->parent->id : UVM_ID_CPU;
+    ats_context->residency_id = gpu ? gpu->id : UVM_ID_CPU;
    ats_context->residency_node = residency;
 }

@ -364,51 +375,43 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
+static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      uvm_ats_fault_context_t *ats_context,
-                                    uvm_va_block_region_t max_prefetch_region,
-                                    uvm_page_mask_t *faulted_mask)
+                                      uvm_va_block_region_t max_prefetch_region)
 {
-    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
-    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;

-    if (uvm_page_mask_empty(faulted_mask))
+    if (uvm_page_mask_empty(accessed_mask))
        return;

    uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
-                                  faulted_mask,
-                                  uvm_va_block_region_from_mask(NULL, faulted_mask),
+                                  accessed_mask,
+                                  uvm_va_block_region_from_mask(NULL, accessed_mask),
                                  max_prefetch_region,
                                  residency_mask,
                                  bitmap_tree,
                                  prefetch_mask);
-
-    uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
-
-    if (vma->vm_flags & VM_WRITE)
-        uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
 }

-static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
+static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      NvU64 base,
+                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
    NV_STATUS status = NV_OK;
-    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
-    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
-    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);

    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
        return status;

-    if (uvm_page_mask_empty(faulted_mask))
+    if (uvm_page_mask_empty(accessed_mask))
        return status;

    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
@ -418,19 +421,27 @@ static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
    // Prefetch the entire region if none of the pages are resident on any node
    // and if preferred_location is the faulting GPU.
    if (ats_context->prefetch_state.has_preferred_location &&
-        ats_context->prefetch_state.first_touch &&
-        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
+        (ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
+        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {

        uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
+    }
+    else {
+        ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
+    }
+
+    if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
+        uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+        uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+
        uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);

        if (vma->vm_flags & VM_WRITE)
            uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
-
-        return status;
    }
-
-    ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
+    else {
+        uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
+    }

    return status;
 }
@ -448,6 +459,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
    uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
    uvm_fault_client_type_t client_type = ats_context->client_type;
+    uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;

    UVM_ASSERT(vma);
    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@ -456,6 +468,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    UVM_ASSERT(gpu_va_space->ats.enabled);
    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);

+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+    uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
+
    uvm_page_mask_zero(faults_serviced_mask);
    uvm_page_mask_zero(reads_serviced_mask);

@ -481,7 +496,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

-    ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
+    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);

    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
        NvU64 start = base + (subregion.first * PAGE_SIZE);
@ -493,12 +508,13 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
        if (status != NV_OK)
            return status;

        if (vma->vm_flags & VM_WRITE) {
            uvm_page_mask_region_fill(faults_serviced_mask, subregion);
+            uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);

            // The Linux kernel never invalidates TLB entries on mapping
            // permission upgrade. This is a problem if the GPU has cached
@ -509,7 +525,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
            // infinite loop because we just forward the fault to the Linux
            // kernel and it will see that the permissions in the page table are
            // correct. Therefore, we flush TLB entries on ATS write faults.
-            flush_tlb_write_faults(gpu_va_space, start, length, client_type);
+            flush_tlb_va_region(gpu_va_space, start, length, client_type);
        }
        else {
            uvm_page_mask_region_fill(reads_serviced_mask, subregion);
@ -527,11 +543,20 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
        if (status != NV_OK)
            return status;

        uvm_page_mask_region_fill(faults_serviced_mask, subregion);
+
+        // Similarly to permission upgrade scenario, discussed above, GPU
+        // will not re-fetch the entry if the PTE is invalid and page size
+        // is 4K. To avoid infinite faulting loop, invalidate TLB for every
+        // new translation written explicitly like in the case of permission
+        // upgrade.
+        if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
+            flush_tlb_va_region(gpu_va_space, start, length, client_type);
+
    }

    return status;
@ -566,7 +591,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
    NV_STATUS status;
    uvm_push_t push;

-    if (!ats_invalidate->write_faults_in_batch)
+    if (!ats_invalidate->tlb_batch_pending)
        return NV_OK;

    UVM_ASSERT(gpu_va_space);
@ -578,7 +603,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
                            "Invalidate ATS entries");

    if (status == NV_OK) {
-        uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
+        uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
        uvm_push_end(&push);

        // Add this push to the GPU's tracker so that fault replays/clears can
@ -586,7 +611,47 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
        status = uvm_tracker_add_push_safe(out_tracker, &push);
    }

-    ats_invalidate->write_faults_in_batch = false;
+    ats_invalidate->tlb_batch_pending = false;

    return status;
 }
+
+NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
+                                          struct vm_area_struct *vma,
+                                          NvU64 base,
+                                          uvm_ats_fault_context_t *ats_context)
+{
+    uvm_va_block_region_t subregion;
+    uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
+    uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
+
+    UVM_ASSERT(vma);
+    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
+    UVM_ASSERT(g_uvm_global.ats.enabled);
+    UVM_ASSERT(gpu_va_space);
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+
+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+    uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
+
+    ats_batch_select_residency(gpu_va_space, vma, ats_context);
+
+    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
+
+    for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
+        NV_STATUS status;
+        NvU64 start = base + (subregion.first * PAGE_SIZE);
+        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
+        uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
+
+        UVM_ASSERT(start >= vma->vm_start);
+        UVM_ASSERT((start + length) <= vma->vm_end);
+
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
+        if (status != NV_OK)
+            return status;
+    }
+
+    return NV_OK;
+}
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@ -42,17 +42,37 @@
 // corresponding bit in read_fault_mask. These returned masks are only valid if
 // the return status is NV_OK. Status other than NV_OK indicate system global
 // fault servicing failures.
+//
+// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
+// lock.
 NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
                                 struct vm_area_struct *vma,
                                 NvU64 base,
                                 uvm_ats_fault_context_t *ats_context);

+// Service access counter notifications on ATS regions in the range (base, base
+// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
+// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
+// The caller is responsible for ensuring that the addresses in the
+// accessed_mask is completely covered by the VMA. The caller is also
+// responsible for handling any errors returned by this function.
+//
+// Returns NV_OK if servicing was successful. Any other error indicates an error
+// while servicing the range.
+//
+// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
+// lock.
+NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
+                                          struct vm_area_struct *vma,
+                                          NvU64 base,
+                                          uvm_ats_fault_context_t *ats_context);
+
 // Return whether there are any VA ranges (and thus GMMU mappings) within the
 // UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
 bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);

 // This function performs pending TLB invalidations for ATS and clears the
-// ats_invalidate->write_faults_in_batch flag
+// ats_invalidate->tlb_batch_pending flag
 NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
                                  uvm_ats_fault_invalidate_t *ats_invalidate,
                                  uvm_tracker_t *out_tracker);
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.c
@ -30,6 +30,7 @@
 #include "uvm_va_space_mm.h"

 #include <asm/io.h>
+#include <linux/log2.h>
 #include <linux/iommu.h>
 #include <linux/mm_types.h>
 #include <linux/acpi.h>
@ -50,6 +51,12 @@
 #define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
 #endif

+// Type to represent a 128-bit SMMU command queue command.
+struct smmu_cmd {
+    NvU64 low;
+    NvU64 high;
+};
+
 // Base address of SMMU CMDQ-V for GSMMU0.
 #define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
 #define SMMU_CMDQV_BASE_LEN 0x00830000
@ -101,9 +108,9 @@
 // Base address offset for the VCMDQ registers.
 #define SMMU_VCMDQ_CMDQ_BASE 0x10000

-// Size of the command queue. Each command is 8 bytes and we can't
-// have a command queue greater than one page.
-#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE 9
+// Size of the command queue. Each command is 16 bytes and we can't
+// have a command queue greater than one page in size.
+#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
 #define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)

 // We always use VINTF63 for the WAR
@ -175,7 +182,6 @@ static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
    iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
              smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));

-    BUILD_BUG_ON((SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 3) > PAGE_SHIFT);
    smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
                       page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
    smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.h
@ -53,10 +53,11 @@
        #define UVM_ATS_SVA_SUPPORTED() 0
    #endif

-// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
-// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
-// kernel not issuing SMMU TLB invalidates on read-only
-#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
+// If NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS is defined it
+// means the upstream fix is in place so no need for the WAR from
+// Bug 4130089: [GH180][r535] WAR for  kernel not issuing SMMU TLB
+// invalidates on read-only
+#if defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
    #define UVM_ATS_SMMU_WAR_REQUIRED() 0
 #elif NVCPU_IS_AARCH64
    #define UVM_ATS_SMMU_WAR_REQUIRED() 1
--- a/kernel-open/nvidia-uvm/uvm_ce_test.c
+++ b/kernel-open/nvidia-uvm/uvm_ce_test.c
@ -56,7 +56,7 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)

    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
@ -176,7 +176,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)

    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
@ -411,10 +411,11 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    size_t i, j, k, s;
    uvm_mem_alloc_params_t mem_params = {0};

-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, &verif_mem), done);
    else
        TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, &verif_mem), done);
+
    TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, gpu), done);

    gpu_verif_addr = uvm_mem_gpu_address_virtual_kernel(verif_mem, gpu);
@ -436,7 +437,7 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
    TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
    gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);

-    if (uvm_conf_computing_mode_enabled(gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        for (i = 0; i < iterations; ++i) {
            for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
                TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
@ -559,7 +560,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)

    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = test_semaphore_alloc_sem(gpu, size, &mem);
@ -611,7 +612,7 @@ static NV_STATUS test_semaphore_release(uvm_gpu_t *gpu)

    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = test_semaphore_alloc_sem(gpu, size, &mem);
@ -665,7 +666,7 @@ static NV_STATUS test_semaphore_timestamp(uvm_gpu_t *gpu)

    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = test_semaphore_alloc_sem(gpu, size, &mem);
@ -1153,7 +1154,7 @@ static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu,
    } small_sizes[] = {{1, 1}, {3, 1}, {8, 1}, {2, 2}, {8, 4}, {UVM_PAGE_SIZE_4K - 8, 8}, {UVM_PAGE_SIZE_4K + 8, 8}};

    // Only Confidential Computing uses CE encryption/decryption
-    if (!uvm_conf_computing_mode_enabled(gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

    // Use a size, and copy size, that are not a multiple of common page sizes.
--- a/kernel-open/nvidia-uvm/uvm_channel.c
+++ b/kernel-open/nvidia-uvm/uvm_channel.c
@ -83,7 +83,7 @@ bool uvm_channel_pool_uses_mutex(uvm_channel_pool_t *pool)
    // submission uses UVM_SPIN_LOOP, which can call 'schedule', to wait for
    // LCIC completion. Indirect submission is synchronous, calling
    // uvm_push_wait which again uses UVM_SPIN_LOOP.
-    if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return true;

    // Unless the mutex is required, the spinlock is preferred when work
@ -95,7 +95,7 @@ static void channel_pool_lock_init(uvm_channel_pool_t *pool)
 {
    uvm_lock_order_t order = UVM_LOCK_ORDER_CHANNEL;

-    if (uvm_conf_computing_mode_enabled(pool->manager->gpu) && uvm_channel_pool_is_wlc(pool))
+    if (g_uvm_global.conf_computing_enabled && uvm_channel_pool_is_wlc(pool))
        order = UVM_LOCK_ORDER_WLC_CHANNEL;

    if (uvm_channel_pool_uses_mutex(pool))
@ -137,7 +137,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
    // Completed value should never exceed the queued value
    UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
                           "GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
-                           channel->pool->manager->gpu->parent->name,
+                           uvm_gpu_name(uvm_channel_get_gpu(channel)),
                           channel->name,
                           completed_value,
                           channel->tracking_sem.queued_value);
@ -273,9 +273,8 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
 static void unlock_channel_for_push(uvm_channel_t *channel)
 {
    NvU32 index;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);

-    if (!uvm_conf_computing_mode_enabled(gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return;

    index = uvm_channel_index_in_pool(channel);
@ -287,25 +286,22 @@ static void unlock_channel_for_push(uvm_channel_t *channel)
    uvm_up_out_of_order(&channel->pool->push_sem);
 }

-static bool is_channel_locked_for_push(uvm_channel_t *channel)
+bool uvm_channel_is_locked_for_push(uvm_channel_t *channel)
 {
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
-
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return test_bit(uvm_channel_index_in_pool(channel), channel->pool->push_locks);

    // For CE and proxy channels, we always return that the channel is locked,
    // which has no functional impact in the UVM channel code-flow, this is only
-    // used on UVM_ASSERTs.
+    // used in UVM_ASSERTs.
    return true;
 }

 static void lock_channel_for_push(uvm_channel_t *channel)
 {
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
    NvU32 index = uvm_channel_index_in_pool(channel);

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    uvm_channel_pool_assert_locked(channel->pool);
    UVM_ASSERT(!test_bit(index, channel->pool->push_locks));

@ -314,10 +310,9 @@ static void lock_channel_for_push(uvm_channel_t *channel)

 static bool test_claim_and_lock_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
 {
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
    NvU32 index = uvm_channel_index_in_pool(channel);

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    uvm_channel_pool_assert_locked(channel->pool);

    if (!test_bit(index, channel->pool->push_locks) && try_claim_channel_locked(channel, num_gpfifo_entries)) {
@ -337,7 +332,7 @@ static NV_STATUS channel_reserve_and_lock_in_pool(uvm_channel_pool_t *pool, uvm_
    NvU32 index;

    UVM_ASSERT(pool);
-    UVM_ASSERT(uvm_conf_computing_mode_enabled(pool->manager->gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

    // This semaphore is uvm_up() in unlock_channel_for_push() as part of the
    // uvm_channel_end_push() routine.
@ -399,7 +394,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t

    UVM_ASSERT(pool);

-    if (uvm_conf_computing_mode_enabled(pool->manager->gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return channel_reserve_and_lock_in_pool(pool, channel_out);

    uvm_for_each_channel_in_pool(channel, pool) {
@ -509,7 +504,7 @@ static void channel_semaphore_gpu_encrypt_payload(uvm_push_t *push, NvU64 semaph
    NvU32 payload_size = sizeof(*semaphore->payload);
    NvU32 *last_pushed_notifier = &semaphore->conf_computing.last_pushed_notifier;

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

    encrypted_payload_gpu_va = uvm_rm_mem_get_gpu_va(semaphore->conf_computing.encrypted_payload, gpu, false);
@ -540,29 +535,97 @@ static void push_reserve_csl_sign_buf(uvm_push_t *push)
    UVM_ASSERT((buf - UVM_METHOD_SIZE / sizeof(*buf)) == push->begin);
 }

+static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
+{
+    unsigned index;
+    uvm_channel_pool_t *paired_pool;
+    uvm_channel_type_t paired_channel_type;
+
+    UVM_ASSERT(channel);
+    UVM_ASSERT(uvm_channel_is_wlc(channel) || uvm_channel_is_lcic(channel));
+
+    index = uvm_channel_index_in_pool(channel);
+    paired_channel_type = uvm_channel_is_wlc(channel) ? UVM_CHANNEL_TYPE_LCIC : UVM_CHANNEL_TYPE_WLC;
+    paired_pool = channel->pool->manager->pool_to_use.default_for_type[paired_channel_type];
+    return paired_pool->channels + index;
+}
+
+uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel)
+{
+    UVM_ASSERT(lcic_channel);
+    UVM_ASSERT(uvm_channel_is_lcic(lcic_channel));
+
+    return get_paired_channel(lcic_channel);
+}
+
+uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel)
+{
+    UVM_ASSERT(wlc_channel);
+    UVM_ASSERT(uvm_channel_is_wlc(wlc_channel));
+
+    return get_paired_channel(wlc_channel);
+}
+
+static NV_STATUS channel_rotate_and_reserve_launch_channel(uvm_channel_t *channel, uvm_channel_t **launch_channel)
+{
+    uvm_channel_manager_t *manager = channel->pool->manager;
+    NV_STATUS status;
+
+    status = uvm_conf_computing_maybe_rotate_channel_ivs(channel);
+    if (status != NV_OK)
+        return status;
+
+    // CE channels, other than WLC fix launch schedule setup, need a launch
+    // channel that needs to be reserved
+    if (uvm_channel_is_ce(channel) &&
+        !(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
+        uvm_channel_t *local_launch_channel = NULL;
+        uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
+                                                   UVM_CHANNEL_TYPE_WLC :
+                                                   UVM_CHANNEL_TYPE_SEC2;
+        status = uvm_channel_reserve_type(manager, indirect_channel_type, &local_launch_channel);
+        if (status != NV_OK)
+            return status;
+
+        // Indirect launch relies on pre-allocated resources to avoid failure
+        // paths. This includes pre-allocating IV space. There's no way to
+        // undo the launch channel reservation, so just return an error.
+        status = uvm_conf_computing_maybe_rotate_channel_ivs(local_launch_channel);
+        if (status != NV_OK) {
+            uvm_channel_release(local_launch_channel, 1);
+            return status;
+        }
+
+        if (uvm_channel_is_wlc(local_launch_channel)) {
+            status = uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_wlc_get_paired_lcic(local_launch_channel));
+            if (status != NV_OK) {
+                uvm_channel_release(local_launch_channel, 1);
+                return status;
+            }
+        }
+        *launch_channel = local_launch_channel;
+    }
+
+
+    return NV_OK;
+}
+
 NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
 {
-    NV_STATUS status;
+    NV_STATUS status = NV_OK;
    uvm_channel_manager_t *manager;
-    uvm_gpu_t *gpu;

    UVM_ASSERT(channel);
    UVM_ASSERT(push);

    manager = channel->pool->manager;

-    gpu = uvm_channel_get_gpu(channel);
-
    // Only SEC2 and WLC with set up fixed schedule can use direct push
    // submission. All other cases (including WLC pre-schedule) need to
    // reserve a launch channel that will be used to submit this push
    // indirectly.
-    if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel) &&
-        !(uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(manager))) {
-        uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(manager) ?
-                                                   UVM_CHANNEL_TYPE_WLC :
-                                                   UVM_CHANNEL_TYPE_SEC2;
-        status = uvm_channel_reserve_type(manager, indirect_channel_type, &push->launch_channel);
+    if (g_uvm_global.conf_computing_enabled) {
+        status = channel_rotate_and_reserve_launch_channel(channel, &push->launch_channel);
        if (status != NV_OK)
            return status;
    }
@ -570,7 +633,7 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
    // When the Confidential Computing feature is enabled, the channel's lock
    // should have already been acquired in uvm_channel_reserve() or
    // channel_reserve_and_lock_in_pool().
-    UVM_ASSERT(is_channel_locked_for_push(channel));
+    UVM_ASSERT(uvm_channel_is_locked_for_push(channel));

    push->channel = channel;
    push->channel_tracking_value = 0;
@ -603,7 +666,7 @@ static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU3
    gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + channel->cpu_put;
    pushbuffer_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);

-    if (uvm_conf_computing_mode_enabled(gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        void *unprotected_pb = uvm_pushbuffer_get_unprotected_cpu_va_for_push(pushbuffer, push);
        UVM_ASSERT(uvm_channel_is_sec2(channel));

@ -674,45 +737,14 @@ static void uvm_channel_tracking_semaphore_release(uvm_push_t *push, NvU64 semap
    // needs to be scheduled to get an encrypted shadow copy in unprotected
    // sysmem. This allows UVM to later decrypt it and observe the new
    // semaphore value.
-    if (uvm_conf_computing_mode_enabled(push->gpu) && uvm_channel_is_ce(push->channel))
+    if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(push->channel))
        channel_semaphore_gpu_encrypt_payload(push, semaphore_va);
 }

-static uvm_channel_t *get_paired_channel(uvm_channel_t *channel)
-{
-    unsigned index;
-    uvm_channel_pool_t *paired_pool;
-    uvm_channel_type_t paired_channel_type;
-
-    UVM_ASSERT(channel);
-    UVM_ASSERT(uvm_channel_is_wlc(channel) || uvm_channel_is_lcic(channel));
-
-    index = uvm_channel_index_in_pool(channel);
-    paired_channel_type = uvm_channel_is_wlc(channel) ? UVM_CHANNEL_TYPE_LCIC : UVM_CHANNEL_TYPE_WLC;
-    paired_pool = channel->pool->manager->pool_to_use.default_for_type[paired_channel_type];
-    return paired_pool->channels + index;
-}
-
-static uvm_channel_t *wlc_get_paired_lcic(uvm_channel_t *wlc_channel)
-{
-    UVM_ASSERT(wlc_channel);
-    UVM_ASSERT(uvm_channel_is_wlc(wlc_channel));
-
-    return get_paired_channel(wlc_channel);
-}
-
-static uvm_channel_t *lcic_get_paired_wlc(uvm_channel_t *lcic_channel)
-{
-    UVM_ASSERT(lcic_channel);
-    UVM_ASSERT(uvm_channel_is_lcic(lcic_channel));
-
-    return get_paired_channel(lcic_channel);
-}
-
 static void internal_channel_submit_work_wlc(uvm_push_t *push)
 {
    uvm_channel_t *wlc_channel = push->channel;
-    uvm_channel_t *lcic_channel = wlc_get_paired_lcic(wlc_channel);
+    uvm_channel_t *lcic_channel = uvm_channel_wlc_get_paired_lcic(wlc_channel);
    UvmCslIv *iv_cpu_addr = lcic_channel->tracking_sem.semaphore.conf_computing.ivs;
    NvU32 *last_pushed_notifier;
    NvU32 iv_index;
@ -926,7 +958,7 @@ static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, N
            uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
            NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;

-            // Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
+            // Reconstruct the previous GPFIFO entry. UVM_GPFIFO_SYNC_WAIT is
            // used only in static WLC schedule.
            // Overwriting the previous entry with the same value doesn't hurt,
            // whether the previous entry has been processed or not
@ -1053,7 +1085,7 @@ static void encrypt_push(uvm_push_t *push)
    uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
    unsigned auth_tag_offset = UVM_CONF_COMPUTING_AUTH_TAG_SIZE * push->push_info_index;

-    if (!uvm_conf_computing_mode_enabled(gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return;

    if (!push_info->on_complete)
@ -1111,7 +1143,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    uvm_channel_tracking_semaphore_release(push, semaphore_va, new_payload);

    if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
-        uvm_channel_t *paired_lcic = wlc_get_paired_lcic(channel);
+        uvm_channel_t *paired_lcic = uvm_channel_wlc_get_paired_lcic(channel);

        gpu->parent->ce_hal->semaphore_reduction_inc(push,
                                                     paired_lcic->channel_info.gpPutGpuVa,
@ -1125,7 +1157,7 @@ void uvm_channel_end_push(uvm_push_t *push)
            // The UVM_MAX_WLC_PUSH_SIZE is set to fit indirect work launch
            // pushes. However, direct pushes to WLC can be smaller than this
            // size. This is used e.g. by indirect submission of control
-            // gpfifo entries.
+            // GPFIFO entries.
            gpu->parent->host_hal->noop(push, UVM_MAX_WLC_PUSH_SIZE - uvm_push_get_size(push));
        }
    }
@ -1144,8 +1176,9 @@ void uvm_channel_end_push(uvm_push_t *push)
    // Indirect submission via SEC2/WLC needs pushes to be aligned for
    // encryption/decryption. The pushbuffer_size of this push
    // influences starting address of the next push.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        entry->pushbuffer_size = UVM_ALIGN_UP(push_size, UVM_CONF_COMPUTING_BUF_ALIGNMENT);
+
    entry->push_info = &channel->push_infos[push->push_info_index];
    entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;

@ -1158,7 +1191,7 @@ void uvm_channel_end_push(uvm_push_t *push)
    else if (uvm_channel_is_wlc(channel) && uvm_channel_manager_is_wlc_ready(channel_manager)) {
        internal_channel_submit_work_wlc(push);
    }
-    else if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel)) {
+    else if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel)) {
        if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
            internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
        }
@ -1209,7 +1242,7 @@ static void submit_ctrl_gpfifo(uvm_channel_t *channel, uvm_gpfifo_entry_t *entry

    UVM_ASSERT(entry == &channel->gpfifo_entries[cpu_put]);

-    if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
+    if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel))
        return;

    gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
@ -1291,8 +1324,6 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
    uvm_gpfifo_entry_t *entry;
    NvU32 cpu_put;
    NvU32 new_cpu_put;
-    bool needs_indirect_submit = false;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);

    channel_pool_lock(channel->pool);

@ -1315,8 +1346,6 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
    --channel->current_gpfifo_count;

    submit_ctrl_gpfifo(channel, entry, new_cpu_put);
-    if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
-        needs_indirect_submit = true;

    channel->cpu_put = new_cpu_put;

@ -1327,7 +1356,8 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
    // semaphore release, where the channel is unlocked.
    channel_pool_unlock(channel->pool);

-    if (needs_indirect_submit) {
+    // Trigger indirect submission when needed.
+    if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel)) {
        NV_STATUS status = submit_ctrl_gpfifo_indirect(channel, entry, cpu_put, new_cpu_put);

        // All failures are globally fatal. There's nothing we do to recover.
@ -1344,12 +1374,11 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
 NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
 {
    NV_STATUS status;
-    uvm_gpu_t *gpu = channel->pool->manager->gpu;
    uvm_push_t push;

    UVM_ASSERT(!uvm_channel_is_proxy(channel));

-    // WLC/LCIC channels can only process custom gpfifo entries before
+    // WLC/LCIC channels can only process custom GPFIFO entries before
    // their schedule is set up.
    UVM_ASSERT(!uvm_channel_is_lcic(channel) || !uvm_channel_manager_is_wlc_ready(channel->pool->manager));
    UVM_ASSERT(!uvm_channel_is_wlc(channel) || !uvm_channel_manager_is_wlc_ready(channel->pool->manager));
@ -1373,10 +1402,28 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
    if (status != NV_OK)
        return status;

+    if (g_uvm_global.conf_computing_enabled) {
+        // Rotating IV needs to idle the channel. However, there's no semaphore
+        // release after submitting a control entry. It is not possible to wait
+        // for in-flight entries after the GPFIFO submission.
+        // Instead, check for IV rotation early. Secure channels are locked for
+        // pushes after reservation so the IV space gained here can't be used
+        // up by concurrent pushes.
+        status = uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(channel);
+        if (status != NV_OK) {
+            uvm_channel_release(channel, 2);
+            return status;
+        }
+    }
+
    write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);

    status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
    if (status != NV_OK) {
+        uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
+
+        // One entry was consumed by GPFIFO entry
+        uvm_channel_release(channel, 1);
        UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
        return status;
    }
@ -1440,9 +1487,8 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
 {
    NV_STATUS status = NV_OK;
    uvm_spin_loop_t spin;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);

-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return channel_reserve_and_lock(channel, num_gpfifo_entries);

    if (try_claim_channel(channel, num_gpfifo_entries))
@ -1460,6 +1506,18 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
    return status;
 }

+void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
+{
+    channel_pool_lock(channel->pool);
+
+    UVM_ASSERT(uvm_channel_is_locked_for_push(channel));
+    unlock_channel_for_push(channel);
+
+    UVM_ASSERT(channel->current_gpfifo_count >= num_gpfifo_entries);
+    channel->current_gpfifo_count -= num_gpfifo_entries;
+    channel_pool_unlock(channel->pool);
+}
+
 // Get the first pending GPFIFO entry, if any.
 // This doesn't stop the entry from being reused.
 static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *channel)
@ -1580,35 +1638,55 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
    return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
 }

-static NV_STATUS csl_init(uvm_channel_t *channel)
+NV_STATUS uvm_channel_wait(uvm_channel_t *channel)
 {
-    NV_STATUS status;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
+    NV_STATUS status = uvm_global_get_status();
+    uvm_spin_loop_t spin;

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    if (uvm_channel_update_progress(channel) == 0 && status == NV_OK)
+        return uvm_channel_check_errors(channel);

-    uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_LEAF);
+    uvm_spin_loop_init(&spin);
+    while (uvm_channel_update_progress(channel) > 0 && status == NV_OK) {
+        UVM_SPIN_LOOP(&spin);
+        status = uvm_global_get_status();

-    status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
-    if (status == NV_OK) {
-        channel->csl.is_ctx_initialized = true;
-    }
-    else {
-        UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
-                      nvstatusToString(status),
-                      uvm_gpu_name(gpu));
+        if (status == NV_OK)
+            status = uvm_channel_check_errors(channel);
    }

    return status;
 }

+static NV_STATUS csl_init(uvm_channel_t *channel)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
+
+    status = uvm_rm_locked_call(nvUvmInterfaceCslInitContext(&channel->csl.ctx, channel->handle));
+    if (status != NV_OK) {
+        uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
+
+        UVM_DBG_PRINT("nvUvmInterfaceCslInitContext() failed: %s, GPU %s\n",
+                      nvstatusToString(status),
+                      uvm_gpu_name(gpu));
+        return status;
+    }
+
+    uvm_mutex_init(&channel->csl.ctx_lock, UVM_LOCK_ORDER_CSL_CTX);
+    channel->csl.is_ctx_initialized = true;
+
+    return NV_OK;
+}
+
 static void csl_destroy(uvm_channel_t *channel)
 {
    if (!channel->csl.is_ctx_initialized)
        return;

    uvm_assert_mutex_unlocked(&channel->csl.ctx_lock);
-    UVM_ASSERT(!is_channel_locked_for_push(channel));
+    UVM_ASSERT(!uvm_channel_is_locked_for_push(channel));

    uvm_rm_locked_call_void(nvUvmInterfaceDeinitCslContext(&channel->csl.ctx));
    channel->csl.is_ctx_initialized = false;
@ -1616,9 +1694,7 @@ static void csl_destroy(uvm_channel_t *channel)

 static void free_conf_computing_buffers(uvm_channel_t *channel)
 {
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
-
-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

    uvm_rm_mem_free(channel->conf_computing.static_pb_protected_vidmem);
@ -1650,7 +1726,7 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
    NV_STATUS status;

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

    status = uvm_rm_mem_alloc_and_map_cpu(gpu,
@ -1770,9 +1846,8 @@ static NV_STATUS alloc_conf_computing_buffers_lcic(uvm_channel_t *channel)
 static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
 {
    NV_STATUS status;
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

    status = alloc_conf_computing_buffers_semaphore(channel);
@ -1786,6 +1861,7 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)
        status = alloc_conf_computing_buffers_lcic(channel);
    }
    else {
+        uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
        void *push_crypto_bundles = uvm_kvmalloc_zero(sizeof(*channel->conf_computing.push_crypto_bundles) *
                                                      channel->num_gpfifo_entries);

@ -1806,8 +1882,6 @@ static NV_STATUS alloc_conf_computing_buffers(uvm_channel_t *channel)

 static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
 {
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
-
    UVM_ASSERT(pool->num_channels > 0);

    if (channel->tracking_sem.queued_value > 0) {
@ -1831,7 +1905,7 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)

    uvm_kvfree(channel->gpfifo_entries);

-    if (uvm_conf_computing_mode_enabled(gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        csl_destroy(channel);

        if (uvm_channel_is_ce(channel))
@ -1889,7 +1963,7 @@ static uvmGpuTsgHandle channel_get_tsg(uvm_channel_t *channel)

    if (uvm_channel_pool_is_wlc(pool) || uvm_channel_pool_is_lcic(pool)) {
        if (uvm_channel_pool_is_lcic(pool)) {
-            channel = lcic_get_paired_wlc(channel);
+            channel = uvm_channel_lcic_get_paired_wlc(channel);
            pool = channel->pool;
        }

@ -1906,7 +1980,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
    UvmGpuChannelAllocParams channel_alloc_params;
    UvmGpuChannelInfo *channel_info = &channel->channel_info;
    uvm_channel_manager_t *manager = channel->pool->manager;
-    uvm_gpu_t *gpu = manager->gpu;

    memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
    channel_alloc_params.numGpFifoEntries = channel_pool_type_num_gpfifo_entries(manager, channel->pool->pool_type);
@ -1914,7 +1987,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
    channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;

    if (uvm_channel_is_sec2(channel)) {
-        UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+        UVM_ASSERT(g_uvm_global.conf_computing_enabled);

        // SEC2 channels' GPPUT and GPFIFO must be allocated in sysmem.
        channel_alloc_params.gpFifoLoc = UVM_BUFFER_LOCATION_SYS;
@ -1928,7 +2001,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel)
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceChannelAllocate() failed: %s, GPU %s, type %s\n",
                      nvstatusToString(status),
-                      uvm_gpu_name(gpu),
+                      uvm_gpu_name(manager->gpu),
                      uvm_channel_pool_type_to_string(channel->pool->pool_type));
        return status;
    }
@ -1994,7 +2067,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
    channel->tools.pending_event_count = 0;
    INIT_LIST_HEAD(&channel->tools.channel_list_node);

-    if (uvm_conf_computing_mode_enabled(gpu) && uvm_channel_is_ce(channel))
+    if (g_uvm_global.conf_computing_enabled && uvm_channel_is_ce(channel))
        semaphore_pool = gpu->secure_semaphore_pool;

    status = uvm_gpu_tracking_semaphore_alloc(semaphore_pool, &channel->tracking_sem);
@ -2020,7 +2093,7 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
        goto error;
    }

-    if (uvm_conf_computing_mode_enabled(gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        status = csl_init(channel);
        if (status != NV_OK)
            goto error;
@ -2079,14 +2152,14 @@ static NV_STATUS channel_init(uvm_channel_t *channel)
    NV_STATUS status;
    NvU32 num_entries = 1;

-    if (uvm_gpu_has_pushbuffer_segments(gpu))
+    if (uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
        num_entries++;

    status = uvm_channel_reserve(channel, num_entries);
    if (status != NV_OK)
        return status;

-    if (uvm_gpu_has_pushbuffer_segments(gpu)) {
+    if (uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent)) {
        NvU64 gpfifo_entry;
        uvm_pushbuffer_t *pushbuffer = uvm_channel_get_pushbuffer(channel);
        NvU64 pb_base = uvm_pushbuffer_get_gpu_va_base(pushbuffer);
@ -2102,6 +2175,10 @@ static NV_STATUS channel_init(uvm_channel_t *channel)

    status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
    if (status != NV_OK) {
+
+        // One entry was consumed by control GPFIFO entry above, release the
+        // second one.
+        uvm_channel_release(channel, 1);
        UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
        return status;
    }
@ -2126,7 +2203,7 @@ static NV_STATUS channel_init(uvm_channel_t *channel)

 static bool channel_manager_uses_proxy_pool(uvm_channel_manager_t *manager)
 {
-    return uvm_gpu_is_virt_mode_sriov_heavy(manager->gpu);
+    return uvm_parent_gpu_is_virt_mode_sriov_heavy(manager->gpu->parent);
 }

 // Number of channels to create in a pool of the given type.
@ -2266,7 +2343,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
    num_channels = channel_pool_type_num_channels(pool_type);
    UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);

-    if (uvm_conf_computing_mode_enabled(channel_manager->gpu)) {
+    if (g_uvm_global.conf_computing_enabled) {
        // Use different order lock for SEC2 and WLC channels.
        // This allows reserving a SEC2 or WLC channel for indirect work
        // submission while holding a reservation for a channel.
@ -2721,11 +2798,11 @@ static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
    num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);

    // CE proxy channel pool.
-    if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
+    if (uvm_parent_gpu_needs_proxy_channel_pool(manager->gpu->parent))
        num_channel_pools++;

    // SEC2 pool, WLC pool, LCIC pool
-    if (uvm_conf_computing_mode_enabled(manager->gpu))
+    if (g_uvm_global.conf_computing_enabled)
        num_channel_pools += 3;

    return num_channel_pools;
@ -3093,7 +3170,7 @@ static NV_STATUS channel_manager_create_conf_computing_pools(uvm_channel_manager
    uvm_channel_pool_t *wlc_pool = NULL;
    uvm_channel_pool_t *lcic_pool = NULL;

-    if (!uvm_conf_computing_mode_enabled(manager->gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = uvm_rm_mem_alloc(manager->gpu,
@ -3173,7 +3250,7 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)

    // In SR-IOV heavy, add an additional, single-channel, pool that is
    // dedicated to the MEMOPS type.
-    if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
+    if (uvm_parent_gpu_needs_proxy_channel_pool(manager->gpu->parent)) {
        uvm_channel_pool_t *proxy_pool = NULL;
        uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();

@ -3295,7 +3372,7 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)

 bool uvm_channel_is_privileged(uvm_channel_t *channel)
 {
-    if (uvm_gpu_is_virt_mode_sriov_heavy(uvm_channel_get_gpu(channel)))
+    if (uvm_parent_gpu_is_virt_mode_sriov_heavy(uvm_channel_get_gpu(channel)->parent))
        return uvm_channel_is_proxy(channel);

    return true;
--- a/kernel-open/nvidia-uvm/uvm_channel.h
+++ b/kernel-open/nvidia-uvm/uvm_channel.h
@ -497,6 +497,10 @@ static bool uvm_channel_is_lcic(uvm_channel_t *channel)
    return uvm_channel_pool_is_lcic(channel->pool);
 }

+uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
+
+uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
+
 static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
 {
    UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
@ -603,6 +607,11 @@ bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value);
 // Update and get the latest completed value by the channel
 NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel);

+// Wait for the channel to idle
+// It waits for anything that is running, but doesn't prevent new work from
+// beginning.
+NV_STATUS uvm_channel_wait(uvm_channel_t *channel);
+
 // Select and reserve a channel with the specified type for a push
 NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager,
                                   uvm_channel_type_t type,
@ -617,6 +626,9 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
 // Reserve a specific channel for a push or for a control GPFIFO entry.
 NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);

+// Release reservation on a specific channel
+void uvm_channel_release(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
+
 // Set optimal CE for P2P transfers between manager->gpu and peer
 void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);

@ -648,6 +660,8 @@ NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);

 void uvm_channel_print_pending_pushes(uvm_channel_t *channel);

+bool uvm_channel_is_locked_for_push(uvm_channel_t *channel);
+
 static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
 {
    return channel->pool->manager->gpu;
--- a/kernel-open/nvidia-uvm/uvm_channel_test.c
+++ b/kernel-open/nvidia-uvm/uvm_channel_test.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -24,6 +24,7 @@
 #include "uvm_global.h"
 #include "uvm_channel.h"
 #include "uvm_hal.h"
+#include "uvm_mem.h"
 #include "uvm_push.h"
 #include "uvm_test.h"
 #include "uvm_test_rng.h"
@ -57,14 +58,14 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
    const NvU32 values_count = iters_per_channel_type_per_gpu;
    const size_t buffer_size = sizeof(NvU32) * values_count;

-    gpu = uvm_va_space_find_first_gpu(va_space);
-    TEST_CHECK_RET(gpu != NULL);
-
    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

+    gpu = uvm_va_space_find_first_gpu(va_space);
+    TEST_CHECK_RET(gpu != NULL);
+
    status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
    TEST_CHECK_GOTO(status == NV_OK, done);

@ -84,7 +85,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)

    TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), done);

-    exclude_proxy_channel_type = uvm_gpu_uses_proxy_channel_pool(gpu);
+    exclude_proxy_channel_type = uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent);

    for (i = 0; i < iters_per_channel_type_per_gpu; ++i) {
        for (j = 0; j < UVM_CHANNEL_TYPE_CE_COUNT; ++j) {
@ -222,7 +223,7 @@ static NV_STATUS uvm_test_rc_for_gpu(uvm_gpu_t *gpu)
    // Check RC on a proxy channel (SR-IOV heavy) or internal channel (any other
    // mode). It is not allowed to use a virtual address in a memset pushed to
    // a proxy channel, so we use a physical address instead.
-    if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
+    if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
        uvm_gpu_address_t dst_address;

        // Save the line number the push that's supposed to fail was started on
@ -314,6 +315,110 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
    return NV_OK;
 }

+static NV_STATUS uvm_test_iommu_rc_for_gpu(uvm_gpu_t *gpu)
+{
+    NV_STATUS status = NV_OK;
+
+#if defined(NV_IOMMU_IS_DMA_DOMAIN_PRESENT) && defined(CONFIG_IOMMU_DEFAULT_DMA_STRICT)
+    // This test needs the DMA API to immediately invalidate IOMMU mappings on
+    // DMA unmap (as apposed to lazy invalidation). The policy can be changed
+    // on boot (e.g. iommu.strict=1), but there isn't a good way to check for
+    // the runtime setting. CONFIG_IOMMU_DEFAULT_DMA_STRICT checks for the
+    // default value.
+
+    uvm_push_t push;
+    uvm_mem_t *sysmem;
+    uvm_gpu_address_t sysmem_dma_addr;
+    char *cpu_ptr = NULL;
+    const size_t data_size = PAGE_SIZE;
+    size_t i;
+
+    struct device *dev = &gpu->parent->pci_dev->dev;
+    struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+    // Check that the iommu domain is controlled by linux DMA API
+    if (!domain || !iommu_is_dma_domain(domain))
+        return NV_OK;
+
+    // Only run if ATS is enabled. Otherwise the CE doesn't get response on
+    // writing to unmapped location.
+    if (!g_uvm_global.ats.enabled)
+        return NV_OK;
+
+    status = uvm_mem_alloc_sysmem_and_map_cpu_kernel(data_size, NULL, &sysmem);
+    TEST_NV_CHECK_RET(status);
+
+    status = uvm_mem_map_gpu_phys(sysmem, gpu);
+    TEST_NV_CHECK_GOTO(status, done);
+
+    cpu_ptr = uvm_mem_get_cpu_addr_kernel(sysmem);
+    sysmem_dma_addr = uvm_mem_gpu_address_physical(sysmem, gpu, 0, data_size);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset to IOMMU mapped sysmem");
+    TEST_NV_CHECK_GOTO(status, done);
+
+    gpu->parent->ce_hal->memset_8(&push, sysmem_dma_addr, 0, data_size);
+
+    status = uvm_push_end_and_wait(&push);
+    TEST_NV_CHECK_GOTO(status, done);
+
+    // Check that we have zeroed the memory
+    for (i = 0; i < data_size; ++i)
+        TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
+
+    // Unmap the buffer and try write again to the same address
+    uvm_mem_unmap_gpu_phys(sysmem, gpu);
+
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Test memset after IOMMU unmap");
+    TEST_NV_CHECK_GOTO(status, done);
+
+    gpu->parent->ce_hal->memset_4(&push, sysmem_dma_addr, 0xffffffff, data_size);
+
+    status = uvm_push_end_and_wait(&push);
+
+    TEST_CHECK_GOTO(status == NV_ERR_RC_ERROR, done);
+    TEST_CHECK_GOTO(uvm_channel_get_status(push.channel) == NV_ERR_RC_ERROR, done);
+    TEST_CHECK_GOTO(uvm_global_reset_fatal_error() == NV_ERR_RC_ERROR, done);
+
+    // Check that writes after unmap did not succeed
+    for (i = 0; i < data_size; ++i)
+        TEST_CHECK_GOTO(cpu_ptr[i] == 0, done);
+
+    status = NV_OK;
+
+done:
+    uvm_mem_free(sysmem);
+#endif
+    return status;
+}
+
+static NV_STATUS test_iommu(uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+
+    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
+
+    for_each_va_space_gpu(gpu, va_space) {
+        NV_STATUS test_status, create_status;
+
+        // The GPU channel manager is destroyed and then re-created after
+        // testing ATS RC fault, so this test requires exclusive access to the GPU.
+        TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
+
+        g_uvm_global.disable_fatal_error_assert = true;
+        test_status = uvm_test_iommu_rc_for_gpu(gpu);
+        g_uvm_global.disable_fatal_error_assert = false;
+
+        uvm_channel_manager_destroy(gpu->channel_manager);
+        create_status = uvm_channel_manager_create(gpu, &gpu->channel_manager);
+
+        TEST_NV_CHECK_RET(test_status);
+        TEST_NV_CHECK_RET(create_status);
+    }
+
+    return NV_OK;
+}
+
 typedef struct
 {
    uvm_push_t push;
@ -403,7 +508,7 @@ static uvm_channel_type_t random_ce_channel_type_except(uvm_test_rng_t *rng, uvm

 static uvm_channel_type_t gpu_random_internal_ce_channel_type(uvm_gpu_t *gpu, uvm_test_rng_t *rng)
 {
-    if (uvm_gpu_uses_proxy_channel_pool(gpu))
+    if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent))
        return random_ce_channel_type_except(rng, uvm_channel_proxy_channel_type());

    return random_ce_channel_type(rng);
@ -693,9 +798,7 @@ NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
    NvU32 i;
    NvU32 num_pushes;

-    gpu = uvm_va_space_find_first_gpu(va_space);
-
-    if (!uvm_conf_computing_mode_enabled(gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

    uvm_thread_context_lock_disable_tracking();
@ -746,6 +849,101 @@ error:
    return status;
 }

+NV_STATUS test_channel_iv_rotation(uvm_va_space_t *va_space)
+{
+    uvm_gpu_t *gpu;
+
+    if (!g_uvm_global.conf_computing_enabled)
+        return NV_OK;
+
+    for_each_va_space_gpu(gpu, va_space) {
+        uvm_channel_pool_t *pool;
+
+        uvm_for_each_pool(pool, gpu->channel_manager) {
+            NvU64 before_rotation_enc, before_rotation_dec, after_rotation_enc, after_rotation_dec;
+            NV_STATUS status = NV_OK;
+
+            // Check one (the first) channel per pool
+            uvm_channel_t *channel = pool->channels;
+
+            // Create a dummy encrypt/decrypt push to use few IVs.
+            // SEC2 used encrypt during initialization, no need to use a dummy
+            // push.
+            if (!uvm_channel_is_sec2(channel)) {
+                uvm_push_t push;
+                size_t data_size;
+                uvm_conf_computing_dma_buffer_t *cipher_text;
+                void *cipher_cpu_va, *plain_cpu_va, *tag_cpu_va;
+                uvm_gpu_address_t cipher_gpu_address, plain_gpu_address, tag_gpu_address;
+                uvm_channel_t *work_channel = uvm_channel_is_lcic(channel) ? uvm_channel_lcic_get_paired_wlc(channel) : channel;
+
+                plain_cpu_va = &status;
+                data_size = sizeof(status);
+
+                TEST_NV_CHECK_RET(uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool,
+                                                                      &cipher_text,
+                                                                      NULL));
+                cipher_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->alloc);
+                tag_cpu_va = uvm_mem_get_cpu_addr_kernel(cipher_text->auth_tag);
+
+                cipher_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->alloc, gpu);
+                tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(cipher_text->auth_tag, gpu);
+
+                TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(work_channel, &push, "Dummy push for IV rotation"), free);
+
+                (void)uvm_push_get_single_inline_buffer(&push,
+                                                        data_size,
+                                                        UVM_CONF_COMPUTING_BUF_ALIGNMENT,
+                                                        &plain_gpu_address);
+
+                uvm_conf_computing_cpu_encrypt(work_channel, cipher_cpu_va, plain_cpu_va, NULL, data_size, tag_cpu_va);
+                gpu->parent->ce_hal->decrypt(&push, plain_gpu_address, cipher_gpu_address, data_size, tag_gpu_address);
+
+                TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), free);
+
+free:
+                uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, cipher_text, NULL);
+
+                if (status != NV_OK)
+                    return status;
+            }
+
+            // Reserve a channel to hold the push lock during rotation
+            if (!uvm_channel_is_lcic(channel))
+                TEST_NV_CHECK_RET(uvm_channel_reserve(channel, 1));
+
+            uvm_conf_computing_query_message_pools(channel, &before_rotation_enc, &before_rotation_dec);
+            TEST_NV_CHECK_GOTO(uvm_conf_computing_rotate_channel_ivs_below_limit(channel, -1, true), release);
+            uvm_conf_computing_query_message_pools(channel, &after_rotation_enc, &after_rotation_dec);
+
+release:
+            if (!uvm_channel_is_lcic(channel))
+                uvm_channel_release(channel, 1);
+
+            if (status != NV_OK)
+                return status;
+
+            // All channels except SEC2 used at least a single IV to release tracking.
+            // SEC2 doesn't support decrypt direction.
+            if (uvm_channel_is_sec2(channel))
+                TEST_CHECK_RET(before_rotation_dec == after_rotation_dec);
+            else
+                TEST_CHECK_RET(before_rotation_dec < after_rotation_dec);
+
+            // All channels used one CPU encrypt/GPU decrypt, either during
+            // initialization or in the push above, with the exception of LCIC.
+            // LCIC is used in tandem with WLC, but it never uses CPU encrypt/
+            // GPU decrypt ops.
+            if (uvm_channel_is_lcic(channel))
+                TEST_CHECK_RET(before_rotation_enc == after_rotation_enc);
+            else
+                TEST_CHECK_RET(before_rotation_enc < after_rotation_enc);
+        }
+    }
+
+    return NV_OK;
+}
+
 NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
 {
    uvm_gpu_t *gpu;
@ -845,11 +1043,9 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
    NvU64 entry;
    uvm_push_t push;

-    gpu = uvm_va_space_find_first_gpu(va_space);
-
    // TODO: Bug 3839176: the test is waived on Confidential Computing because
    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(gpu))
+    if (g_uvm_global.conf_computing_enabled)
        return NV_OK;

    for_each_va_space_gpu(gpu, va_space) {
@ -924,7 +1120,7 @@ static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space
        uvm_channel_manager_t *manager;
        uvm_channel_pool_t *pool;

-        if (!uvm_gpu_has_pushbuffer_segments(gpu))
+        if (!uvm_parent_gpu_needs_pushbuffer_segments(gpu->parent))
            continue;

        // The GPU channel manager pushbuffer is destroyed and then re-created
@ -999,6 +1195,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
    if (status != NV_OK)
        goto done;

+    status = test_channel_iv_rotation(va_space);
+    if (status != NV_OK)
+        goto done;
+
    // The following tests have side effects, they reset the GPU's
    // channel_manager.
    status = test_channel_pushbuffer_extension_base(va_space);
@ -1019,6 +1219,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
            goto done;
    }

+    status = test_iommu(va_space);
+    if (status != NV_OK)
+        goto done;
+
 done:
    uvm_va_space_up_read_rm(va_space);
    uvm_mutex_unlock(&g_uvm_global.global_lock);
@ -1034,23 +1238,22 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
    if (params->iterations == 0 || params->num_streams == 0)
        return NV_ERR_INVALID_PARAMETER;

+    // TODO: Bug 3839176: the test is waived on Confidential Computing because
+    // it assumes that GPU can access system memory without using encryption.
+    if (g_uvm_global.conf_computing_enabled)
+        return NV_OK;
+
    // TODO: Bug 1764963: Rework the test to not rely on the global lock as that
    // serializes all the threads calling this at the same time.
    uvm_mutex_lock(&g_uvm_global.global_lock);
    uvm_va_space_down_read_rm(va_space);

-    // TODO: Bug 3839176: the test is waived on Confidential Computing because
-    // it assumes that GPU can access system memory without using encryption.
-    if (uvm_conf_computing_mode_enabled(uvm_va_space_find_first_gpu(va_space)))
-        goto done;
-
    status = stress_test_all_gpus_in_va(va_space,
                                        params->num_streams,
                                        params->iterations,
                                        params->seed,
                                        params->verbose);

-done:
    uvm_va_space_up_read_rm(va_space);
    uvm_mutex_unlock(&g_uvm_global.global_lock);

--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@ -21,8 +21,8 @@

 *******************************************************************************/

-#ifndef _UVM_COMMON_H
-#define _UVM_COMMON_H
+#ifndef __UVM_COMMON_H__
+#define __UVM_COMMON_H__

 #ifdef DEBUG
    #define UVM_IS_DEBUG() 1
@ -204,13 +204,6 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
 #define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...)  _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
 #define UVM_ASSERT_RELEASE(expr)                _UVM_ASSERT_MSG_RELEASE(expr, #expr, "\n")

-// Provide a short form of UUID's, typically for use in debug printing:
-#define ABBREV_UUID(uuid) (unsigned)(uuid)
-
-static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
-{
-    return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
-}
 #define UVM_SIZE_1KB (1024ULL)
 #define UVM_SIZE_1MB (1024 * UVM_SIZE_1KB)
 #define UVM_SIZE_1GB (1024 * UVM_SIZE_1MB)
@ -409,4 +402,40 @@ static inline void uvm_touch_page(struct page *page)
 // Return true if the VMA is one used by UVM managed allocations.
 bool uvm_vma_is_managed(struct vm_area_struct *vma);

-#endif /* _UVM_COMMON_H */
+static bool uvm_platform_uses_canonical_form_address(void)
+{
+    if (NVCPU_IS_PPC64LE)
+        return false;
+
+    return true;
+}
+
+// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
+static NvU32 uvm_cpu_num_va_bits(void)
+{
+    return fls64(TASK_SIZE - 1) + 1;
+}
+
+// Return the unaddressable range in a num_va_bits-wide VA space, [first, outer)
+static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *outer)
+{
+    UVM_ASSERT(num_va_bits < 64);
+    UVM_ASSERT(first);
+    UVM_ASSERT(outer);
+
+    if (uvm_platform_uses_canonical_form_address()) {
+        *first = 1ULL << (num_va_bits - 1);
+        *outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
+    }
+    else {
+        *first = 1ULL << num_va_bits;
+        *outer = ~0Ull;
+    }
+}
+
+static void uvm_cpu_get_unaddressable_range(NvU64 *first, NvU64 *outer)
+{
+    return uvm_get_unaddressable_range(uvm_cpu_num_va_bits(), first, outer);
+}
+
+#endif /* __UVM_COMMON_H__ */
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.c
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -33,44 +33,55 @@
 #include "nv_uvm_interface.h"
 #include "uvm_va_block.h"

+// The maximum number of secure operations per push is:
+// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
+// + 1 (tracking semaphore) =  128 * 1024 / 56 + 1 = 2342
+#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN 2342lu
+
+// Channels use 32-bit counters so the value after rotation is 0xffffffff.
+// setting the limit to this value (or higher) will result in rotation
+// on every check. However, pre-emptive rotation when submitting control
+// GPFIFO entries relies on the fact that multiple successive checks after
+// rotation do not trigger more rotations if there was no IV used in between.
+#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX 0xfffffffelu
+
+// Attempt rotation when two billion IVs are left. IV rotation call can fail if
+// the necessary locks are not available, so multiple attempts may be need for
+// IV rotation to succeed.
+#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT (1lu << 31)
+
+// Start rotating after 500 encryption/decryptions when running tests.
+#define UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS ((1lu << 32) - 500lu)
+static ulong uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
+
+module_param(uvm_conf_computing_channel_iv_rotation_limit, ulong, S_IRUGO);

 static UvmGpuConfComputeMode uvm_conf_computing_get_mode(const uvm_parent_gpu_t *parent)
 {
    return parent->rm_info.gpuConfComputeCaps.mode;
 }

-bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent)
-{
-    return uvm_conf_computing_get_mode(parent) != UVM_GPU_CONF_COMPUTE_MODE_NONE;
-}
-
-bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu)
-{
-    return uvm_conf_computing_mode_enabled_parent(gpu->parent);
-}
-
 bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu)
 {
    return uvm_conf_computing_get_mode(gpu->parent) == UVM_GPU_CONF_COMPUTE_MODE_HCC;
 }

-NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent)
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent)
 {
-    UvmGpuConfComputeMode cc, sys_cc;
-    uvm_gpu_t *first;
+    uvm_parent_gpu_t *other_parent;
+    UvmGpuConfComputeMode parent_mode = uvm_conf_computing_get_mode(parent);

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);

-    // TODO: Bug 2844714: since we have no routine to traverse parent GPUs,
-    // find first child GPU and get its parent.
-    first = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);
-    if (!first)
-        return NV_OK;
+    // The Confidential Computing state of the GPU should match that of the
+    // system.
+    UVM_ASSERT((parent_mode != UVM_GPU_CONF_COMPUTE_MODE_NONE) == g_uvm_global.conf_computing_enabled);

-    sys_cc = uvm_conf_computing_get_mode(first->parent);
-    cc = uvm_conf_computing_get_mode(parent);
-
-    return cc == sys_cc ? NV_OK : NV_ERR_NOT_SUPPORTED;
+    // All GPUs derive Confidential Computing status from their parent. By
+    // current policy all parent GPUs have identical Confidential Computing
+    // status.
+    for_each_parent_gpu(other_parent)
+        UVM_ASSERT(parent_mode == uvm_conf_computing_get_mode(other_parent));
 }

 static void dma_buffer_destroy_locked(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool,
@ -184,15 +195,11 @@ static void dma_buffer_pool_add(uvm_conf_computing_dma_buffer_pool_t *dma_buffer
 static NV_STATUS conf_computing_dma_buffer_pool_init(uvm_conf_computing_dma_buffer_pool_t *dma_buffer_pool)
 {
    size_t i;
-    uvm_gpu_t *gpu;
    size_t num_dma_buffers = 32;
    NV_STATUS status = NV_OK;

    UVM_ASSERT(dma_buffer_pool->num_dma_buffers == 0);
-
-    gpu = dma_buffer_pool_to_gpu(dma_buffer_pool);
-
-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

    INIT_LIST_HEAD(&dma_buffer_pool->free_dma_buffers);
    uvm_mutex_init(&dma_buffer_pool->lock, UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
@ -349,7 +356,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
 {
    NV_STATUS status;

-    if (!uvm_conf_computing_mode_enabled(gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

    status = conf_computing_dma_buffer_pool_init(&gpu->conf_computing.dma_buffer_pool);
@ -360,6 +367,20 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
    if (status != NV_OK)
        goto error;

+    if (uvm_enable_builtin_tests && uvm_conf_computing_channel_iv_rotation_limit == UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT)
+        uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_TESTS;
+
+    if (uvm_conf_computing_channel_iv_rotation_limit < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
+        uvm_conf_computing_channel_iv_rotation_limit > UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX) {
+        UVM_ERR_PRINT("Value of uvm_conf_computing_channel_iv_rotation_limit: %lu is outside of the safe "
+                      "range: <%lu, %lu>. Using the default value instead (%lu)\n",
+                      uvm_conf_computing_channel_iv_rotation_limit,
+                      UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN,
+                      UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MAX,
+                      UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT);
+        uvm_conf_computing_channel_iv_rotation_limit = UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_DEFAULT;
+    }
+
    return NV_OK;

 error:
@ -381,9 +402,8 @@ void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

-    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
-    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
-    // nvUvmInterfaceCslRotateIv.
+    // IV rotation is done preemptively as needed, so the above
+    // call cannot return failure.
    UVM_ASSERT(status == NV_OK);
 }

@ -395,9 +415,8 @@ void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *
    status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

-    // TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
-    // NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
-    // nvUvmInterfaceCslRotateIv.
+    // IV rotation is done preemptively as needed, so the above
+    // call cannot return failure.
    UVM_ASSERT(status == NV_OK);
 }

@ -421,8 +440,8 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
                                      (NvU8 *) auth_tag_buffer);
    uvm_mutex_unlock(&channel->csl.ctx_lock);

-    // nvUvmInterfaceCslEncrypt fails when a 64-bit encryption counter
-    // overflows. This is not supposed to happen on CC.
+    // IV rotation is done preemptively as needed, so the above
+    // call cannot return failure.
    UVM_ASSERT(status == NV_OK);
 }

@ -435,6 +454,16 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
 {
    NV_STATUS status;

+    // The CSL context associated with a channel can be used by multiple
+    // threads. The IV sequence is thus guaranteed only while the channel is
+    // "locked for push". The channel/push lock is released in
+    // "uvm_channel_end_push", and at that time the GPU encryption operations
+    // have not executed, yet. Therefore the caller has to use
+    // "uvm_conf_computing_log_gpu_encryption" to explicitly store IVs needed
+    // to perform CPU decryption and pass those IVs to this function after the
+    // push that did the encryption completes.
+    UVM_ASSERT(src_iv);
+
    uvm_mutex_lock(&channel->csl.ctx_lock);
    status = nvUvmInterfaceCslDecrypt(&channel->csl.ctx,
                                      size,
@ -463,7 +492,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
    // decryption is invoked as part of fault servicing.
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

-    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

    status = nvUvmInterfaceCslDecrypt(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
                                      parent_gpu->fault_buffer_hal->entry_size(parent_gpu),
@ -475,7 +504,9 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
                                      (const NvU8 *) auth_tag_buffer);

    if (status != NV_OK)
-        UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n", nvstatusToString(status), parent_gpu->name);
+        UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, GPU %s\n",
+                      nvstatusToString(status),
+                      uvm_parent_gpu_name(parent_gpu));

    return status;
 }
@ -487,7 +518,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu,
    // See comment in uvm_conf_computing_fault_decrypt
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

-    UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

    status = nvUvmInterfaceCslIncrementIv(&parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx,
                                          UVM_CSL_OPERATION_DECRYPT,
@ -496,3 +527,101 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu,

    UVM_ASSERT(status == NV_OK);
 }
+
+void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
+                                            NvU64 *remaining_encryptions,
+                                            NvU64 *remaining_decryptions)
+{
+    NV_STATUS status;
+
+    UVM_ASSERT(channel);
+    UVM_ASSERT(remaining_encryptions);
+    UVM_ASSERT(remaining_decryptions);
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+    status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, remaining_encryptions);
+    UVM_ASSERT(status == NV_OK);
+    UVM_ASSERT(*remaining_encryptions <= NV_U32_MAX);
+
+    status = nvUvmInterfaceCslQueryMessagePool(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, remaining_decryptions);
+    UVM_ASSERT(status == NV_OK);
+    UVM_ASSERT(*remaining_decryptions <= NV_U32_MAX);
+
+    // LCIC channels never use CPU encrypt/GPU decrypt
+    if (uvm_channel_is_lcic(channel))
+        UVM_ASSERT(*remaining_encryptions == NV_U32_MAX);
+
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+}
+
+static NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit_internal(uvm_channel_t *channel, NvU64 limit)
+{
+    NV_STATUS status = NV_OK;
+    NvU64 remaining_encryptions, remaining_decryptions;
+    bool rotate_encryption_iv, rotate_decryption_iv;
+
+    UVM_ASSERT(uvm_channel_is_locked_for_push(channel) ||
+               (uvm_channel_is_lcic(channel) && uvm_channel_manager_is_wlc_ready(channel->pool->manager)));
+
+    uvm_conf_computing_query_message_pools(channel, &remaining_encryptions, &remaining_decryptions);
+
+    // Ignore decryption limit for SEC2, only CE channels support
+    // GPU encrypt/CPU decrypt. However, RM reports _some_ decrementing
+    // value for SEC2 decryption counter.
+    rotate_decryption_iv = (remaining_decryptions <= limit) && uvm_channel_is_ce(channel);
+    rotate_encryption_iv = remaining_encryptions <= limit;
+
+    if (!rotate_encryption_iv && !rotate_decryption_iv)
+        return NV_OK;
+
+    // Wait for all in-flight pushes. The caller needs to guarantee that there
+    // are no concurrent pushes created, e.g. by only calling rotate after
+    // a channel is locked_for_push.
+    status = uvm_channel_wait(channel);
+    if (status != NV_OK)
+        return status;
+
+    uvm_mutex_lock(&channel->csl.ctx_lock);
+
+    if (rotate_encryption_iv)
+        status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT);
+
+    if (status == NV_OK && rotate_decryption_iv)
+        status = nvUvmInterfaceCslRotateIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT);
+
+    uvm_mutex_unlock(&channel->csl.ctx_lock);
+
+    // Change the error to out of resources if the available IVs are running
+    // too low
+    if (status == NV_ERR_STATE_IN_USE &&
+        (remaining_encryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN ||
+         remaining_decryptions < UVM_CONF_COMPUTING_IV_REMAINING_LIMIT_MIN))
+        return NV_ERR_INSUFFICIENT_RESOURCES;
+
+    return status;
+}
+
+NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy)
+{
+    NV_STATUS status;
+
+    do {
+        status = uvm_conf_computing_rotate_channel_ivs_below_limit_internal(channel, limit);
+    } while (retry_if_busy && status == NV_ERR_STATE_IN_USE);
+
+    // Hide "busy" error. The rotation will be retried at the next opportunity.
+    if (!retry_if_busy && status == NV_ERR_STATE_IN_USE)
+        status = NV_OK;
+
+    return status;
+}
+
+NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel)
+{
+    return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, false);
+}
+
+NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel)
+{
+    return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
+}
--- a/kernel-open/nvidia-uvm/uvm_conf_computing.h
+++ b/kernel-open/nvidia-uvm/uvm_conf_computing.h
@ -60,12 +60,8 @@
 // UVM_METHOD_SIZE * 2 * 10 = 80.
 #define UVM_CONF_COMPUTING_SIGN_BUF_MAX_SIZE 80

-// All GPUs derive confidential computing status from their parent.
-// By current policy all parent GPUs have identical confidential
-// computing status.
-NV_STATUS uvm_conf_computing_init_parent_gpu(const uvm_parent_gpu_t *parent);
-bool uvm_conf_computing_mode_enabled_parent(const uvm_parent_gpu_t *parent);
-bool uvm_conf_computing_mode_enabled(const uvm_gpu_t *gpu);
+void uvm_conf_computing_check_parent_gpu(const uvm_parent_gpu_t *parent);
+
 bool uvm_conf_computing_mode_is_hcc(const uvm_gpu_t *gpu);

 typedef struct
@ -201,4 +197,21 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
 //
 // Locking: this function must be invoked while holding the replayable ISR lock.
 void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu, NvU64 increment);
+
+// Query the number of remaining messages before IV needs to be rotated.
+void uvm_conf_computing_query_message_pools(uvm_channel_t *channel,
+                                            NvU64 *remaining_encryptions,
+                                            NvU64 *remaining_decryptions);
+
+// Check if there are more than uvm_conf_computing_channel_iv_rotation_limit
+// messages available in the channel and try to rotate if not.
+NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs(uvm_channel_t *channel);
+
+// Check if there are more than uvm_conf_computing_channel_iv_rotation_limit
+// messages available in the channel and rotate if not.
+NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *channel);
+
+// Check if there are fewer than 'limit' messages available in either direction
+// and rotate if not.
+NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
 #endif // __UVM_CONF_COMPUTING_H__
--- a/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
+++ b/kernel-open/nvidia-uvm/uvm_fault_buffer_flush_test.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2019 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -34,23 +34,24 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
    NV_STATUS status = NV_OK;
    uvm_va_space_t *va_space = uvm_va_space_get(filp);
    uvm_gpu_t *gpu;
-    uvm_global_processor_mask_t retained_gpus;
+    uvm_processor_mask_t *retained_gpus;
    NvU64 i;

-    uvm_global_processor_mask_zero(&retained_gpus);
+    retained_gpus = uvm_processor_mask_cache_alloc();
+    if (!retained_gpus)
+        return NV_ERR_NO_MEMORY;
+
+    uvm_processor_mask_zero(retained_gpus);

    uvm_va_space_down_read(va_space);

-    for_each_va_space_gpu(gpu, va_space) {
-        if (gpu->parent->replayable_faults_supported)
-            uvm_global_processor_mask_set(&retained_gpus, gpu->global_id);
-    }
+    uvm_processor_mask_and(retained_gpus, &va_space->faultable_processors, &va_space->registered_gpus);

-    uvm_global_mask_retain(&retained_gpus);
+    uvm_global_gpu_retain(retained_gpus);

    uvm_va_space_up_read(va_space);

-    if (uvm_global_processor_mask_empty(&retained_gpus))
+    if (uvm_processor_mask_empty(retained_gpus))
        return NV_ERR_INVALID_DEVICE;

    for (i = 0; i < params->iterations; i++) {
@ -59,11 +60,12 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
            break;
        }

-        for_each_global_gpu_in_mask(gpu, &retained_gpus)
+        for_each_gpu_in_mask(gpu, retained_gpus)
            TEST_CHECK_GOTO(uvm_gpu_fault_buffer_flush(gpu) == NV_OK, out);
    }

 out:
-    uvm_global_mask_release(&retained_gpus);
+    uvm_global_gpu_release(retained_gpus);
+    uvm_processor_mask_cache_free(retained_gpus);
    return status;
 }
--- a/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
+++ b/kernel-open/nvidia-uvm/uvm_get_rm_ptes_test.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVidia Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -151,6 +151,22 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
    return NV_OK;
 }

+static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
+{
+    uvm_gpu_t *gpu;
+
+    // TODO: Bug 4351121: RM will return the GI UUID, but
+    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
+    // Match on GI UUID until the UVM user level API has been updated to use
+    // the GI UUID.
+    for_each_va_space_gpu(gpu, va_space) {
+        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
+            mem_info->uuid = gpu->parent->uuid;
+            break;
+        }
+    }
+}
+
 static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
 {
    NV_STATUS status = NV_OK;
@ -168,7 +184,8 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
    client = params->hClient;
    memory = params->hMemory;

-    // Note: This check is safe as single GPU test does not run on SLI enabled devices.
+    // Note: This check is safe as single GPU test does not run on SLI enabled
+    // devices.
    memory_mapping_gpu = uvm_va_space_get_gpu_by_uuid_with_gpu_va_space(va_space, &params->gpu_uuid);
    if (!memory_mapping_gpu)
        return NV_ERR_INVALID_DEVICE;
@ -180,7 +197,12 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
    if (status != NV_OK)
        return status;

-    TEST_CHECK_GOTO(uvm_processor_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);
+    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
+    // physical GPU UUID until the UVM user level has been updated to use
+    // the GI UUID.
+    fix_memory_info_uuid(va_space, &memory_info);
+
+    TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);

    TEST_CHECK_GOTO((memory_info.size == params->size), done);

@ -287,6 +309,11 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
   if (status != NV_OK)
       return status;

+    // TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
+    // physical GPU UUID until the UVM user level has been updated to use
+    // the GI UUID.
+    fix_memory_info_uuid(va_space, &memory_info);
+
    memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));

    memset(pte_buffer, 0, sizeof(pte_buffer));
--- a/kernel-open/nvidia-uvm/uvm_global.c
+++ b/kernel-open/nvidia-uvm/uvm_global.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -27,6 +27,7 @@
 #include "uvm_gpu_replayable_faults.h"
 #include "uvm_mem.h"
 #include "uvm_perf_events.h"
+#include "uvm_processors.h"
 #include "uvm_procfs.h"
 #include "uvm_thread_context.h"
 #include "uvm_va_range.h"
@ -71,11 +72,6 @@ static void uvm_unregister_callbacks(void)
    }
 }

-static void sev_init(const UvmPlatformInfo *platform_info)
-{
-    g_uvm_global.sev_enabled = platform_info->sevEnabled;
-}
-
 NV_STATUS uvm_global_init(void)
 {
    NV_STATUS status;
@ -124,8 +120,13 @@ NV_STATUS uvm_global_init(void)

    uvm_ats_init(&platform_info);
    g_uvm_global.num_simulated_devices = 0;
+    g_uvm_global.conf_computing_enabled = platform_info.confComputingEnabled;

-    sev_init(&platform_info);
+    status = uvm_processor_mask_cache_init();
+    if (status != NV_OK) {
+        UVM_ERR_PRINT("uvm_processor_mask_cache_init() failed: %s\n", nvstatusToString(status));
+        goto error;
+    }

    status = uvm_gpu_init();
    if (status != NV_OK) {
@ -229,6 +230,7 @@ void uvm_global_exit(void)
    uvm_mem_global_exit();
    uvm_pmm_sysmem_exit();
    uvm_gpu_exit();
+    uvm_processor_mask_cache_exit();

    if (g_uvm_global.rm_session_handle != 0)
        uvm_rm_locked_call_void(nvUvmInterfaceSessionDestroy(g_uvm_global.rm_session_handle));
@ -247,19 +249,19 @@ void uvm_global_exit(void)

 // Signal to the top-half ISR whether calls from the RM's top-half ISR are to
 // be completed without processing.
-static void uvm_gpu_set_isr_suspended(uvm_gpu_t *gpu, bool is_suspended)
+static void uvm_parent_gpu_set_isr_suspended(uvm_parent_gpu_t *parent_gpu, bool is_suspended)
 {
-    uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
+    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    gpu->parent->isr.is_suspended = is_suspended;
+    parent_gpu->isr.is_suspended = is_suspended;

-    uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
+    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }

 static NV_STATUS uvm_suspend(void)
 {
    uvm_va_space_t *va_space = NULL;
-    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_id_t gpu_id;
    uvm_gpu_t *gpu;

    // Upon entry into this function, the following is true:
@ -293,7 +295,7 @@ static NV_STATUS uvm_suspend(void)
    // Though global_lock isn't held here, pm.lock indirectly prevents the
    // addition and removal of GPUs, since these operations can currently
    // only occur in response to ioctl() calls.
-    for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
+    for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
        gpu = uvm_gpu_get(gpu_id);

        // Since fault buffer state may be lost across sleep cycles, UVM must
@ -314,7 +316,7 @@ static NV_STATUS uvm_suspend(void)
        // notifications have been handled.
        uvm_gpu_access_counters_set_ignore(gpu, true);

-        uvm_gpu_set_isr_suspended(gpu, true);
+        uvm_parent_gpu_set_isr_suspended(gpu->parent, true);

        nv_kthread_q_flush(&gpu->parent->isr.bottom_half_q);

@ -347,7 +349,7 @@ NV_STATUS uvm_suspend_entry(void)
 static NV_STATUS uvm_resume(void)
 {
    uvm_va_space_t *va_space = NULL;
-    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_id_t gpu_id;
    uvm_gpu_t *gpu;

    g_uvm_global.pm.is_suspended = false;
@ -366,14 +368,14 @@ static NV_STATUS uvm_resume(void)
    uvm_mutex_unlock(&g_uvm_global.va_spaces.lock);

    // pm.lock is held in lieu of global_lock to prevent GPU addition/removal
-    for_each_global_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
+    for_each_gpu_id_in_mask(gpu_id, &g_uvm_global.retained_gpus) {
        gpu = uvm_gpu_get(gpu_id);

        // Bring the fault buffer software state back in sync with the
        // hardware state.
        uvm_gpu_fault_buffer_resume(gpu->parent);

-        uvm_gpu_set_isr_suspended(gpu, false);
+        uvm_parent_gpu_set_isr_suspended(gpu->parent, false);

        // Reenable access counter interrupt processing unless notifications
        // have been set to be suppressed.
@ -431,35 +433,36 @@ NV_STATUS uvm_global_reset_fatal_error(void)
    return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
 }

-void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask)
+void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
 {
    uvm_gpu_t *gpu;
-    for_each_global_gpu_in_mask(gpu, mask)
+
+    for_each_gpu_in_mask(gpu, mask)
        uvm_gpu_retain(gpu);
 }

-void uvm_global_mask_release(const uvm_global_processor_mask_t *mask)
+void uvm_global_gpu_release(const uvm_processor_mask_t *mask)
 {
-    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_id_t gpu_id;

-    if (uvm_global_processor_mask_empty(mask))
+    if (uvm_processor_mask_empty(mask))
        return;

    uvm_mutex_lock(&g_uvm_global.global_lock);

-    // Do not use for_each_global_gpu_in_mask as it reads the GPU state and it
-    // might get destroyed
-    for_each_global_gpu_id_in_mask(gpu_id, mask)
+    // Do not use for_each_gpu_in_mask as it reads the GPU state and it
+    // might get destroyed.
+    for_each_gpu_id_in_mask(gpu_id, mask)
        uvm_gpu_release_locked(uvm_gpu_get(gpu_id));

    uvm_mutex_unlock(&g_uvm_global.global_lock);
 }

-NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus)
+NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus)
 {
    uvm_gpu_t *gpu;

-    for_each_global_gpu_in_mask(gpu, gpus) {
+    for_each_gpu_in_mask(gpu, gpus) {
        NV_STATUS status = uvm_gpu_check_ecc_error(gpu);
        if (status != NV_OK)
            return status;
--- a/kernel-open/nvidia-uvm/uvm_global.h
+++ b/kernel-open/nvidia-uvm/uvm_global.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2021 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -40,13 +40,13 @@ struct uvm_global_struct
    // Note that GPUs are added to this mask as the last step of add_gpu() and
    // removed from it as the first step of remove_gpu() implying that a GPU
    // that's being initialized or deinitialized will not be in it.
-    uvm_global_processor_mask_t retained_gpus;
+    uvm_processor_mask_t retained_gpus;

    // Array of the parent GPUs registered with UVM. Note that GPUs will have
-    // ids offset by 1 to accomodate the UVM_GLOBAL_ID_CPU so e.g.
-    // parent_gpus[0] will have GPU id = 1. A GPU entry is unused iff it does
-    // not exist (is a NULL pointer) in this table.
-    uvm_parent_gpu_t *parent_gpus[UVM_MAX_GPUS];
+    // ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
+    // will have GPU id = 1. A GPU entry is unused iff it does not exist
+    // (is a NULL pointer) in this table.
+    uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];

    // A global RM session (RM client)
    // Created on module load and destroyed on module unload
@ -143,11 +143,16 @@ struct uvm_global_struct
        struct page *page;
    } unload_state;

-    // AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
-    // enabled. This field is set once during global initialization
-    // (uvm_global_init), and can be read afterwards without acquiring any
-    // locks.
-    bool sev_enabled;
+    // True if the VM has AMD's SEV, or equivalent HW security extensions such
+    // as Intel's TDX, enabled. The flag is always false on the host.
+    //
+    // This value moves in tandem with that of Confidential Computing in the
+    // GPU(s) in all supported configurations, so it is used as a proxy for the
+    // Confidential Computing state.
+    //
+    // This field is set once during global initialization (uvm_global_init),
+    // and can be read afterwards without acquiring any locks.
+    bool conf_computing_enabled;
 };

 // Initialize global uvm state
@ -167,7 +172,7 @@ NV_STATUS uvm_resume_entry(void);
 // LOCKING: requires that you hold the global lock and gpu_table_lock
 static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+    NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
    uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
@ -181,7 +186,7 @@ static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
 // LOCKING: requires that you hold the global lock and gpu_table_lock
 static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
 {
-    NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+    NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);

    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
    uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
@ -196,41 +201,25 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
 //
 // LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
 // retained at least one of the child GPUs.
-static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
+static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
 {
-    return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
+    return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
 }

-// Get a gpu by its global id.
+// Get a gpu by its GPU id.
 // Returns a pointer to the GPU object, or NULL if not found.
 //
 // LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
 // retained the gpu.
-static uvm_gpu_t *uvm_gpu_get(uvm_global_gpu_id_t global_gpu_id)
+static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
 {
    uvm_parent_gpu_t *parent_gpu;

-    parent_gpu = g_uvm_global.parent_gpus[uvm_id_gpu_index_from_global_gpu_id(global_gpu_id)];
+    parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
    if (!parent_gpu)
        return NULL;

-    return parent_gpu->gpus[uvm_global_id_sub_processor_index(global_gpu_id)];
-}
-
-// Get a gpu by its processor id.
-// Returns a pointer to the GPU object, or NULL if not found.
-//
-// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
-// retained the gpu.
-static uvm_gpu_t *uvm_gpu_get_by_processor_id(uvm_processor_id_t id)
-{
-    uvm_global_gpu_id_t global_id = uvm_global_gpu_id_from_gpu_id(id);
-    uvm_gpu_t *gpu = uvm_gpu_get(global_id);
-
-    if (gpu)
-        UVM_ASSERT(!gpu->parent->smc.enabled);
-
-    return gpu;
+    return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
 }

 static uvmGpuSessionHandle uvm_global_session_handle(void)
@ -287,56 +276,57 @@ static NV_STATUS uvm_global_get_status(void)
 // reset call was made.
 NV_STATUS uvm_global_reset_fatal_error(void);

-static uvm_gpu_t *uvm_global_processor_mask_find_first_gpu(const uvm_global_processor_mask_t *global_gpus)
+static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
 {
    uvm_gpu_t *gpu;
-    uvm_global_gpu_id_t gpu_id = uvm_global_processor_mask_find_first_gpu_id(global_gpus);
+    uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);

-    if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
+    if (UVM_ID_IS_INVALID(gpu_id))
        return NULL;

    gpu = uvm_gpu_get(gpu_id);

    // If there is valid GPU id in the mask, assert that the corresponding
    // uvm_gpu_t is present. Otherwise it would stop a
-    // for_each_global_gpu_in_mask() loop pre-maturely. Today, this could only
+    // for_each_gpu_in_mask() loop pre-maturely. Today, this could only
    // happen in remove_gpu() because the GPU being removed is deleted from the
    // global table very early.
-    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
+    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));

    return gpu;
 }

-static uvm_gpu_t *__uvm_global_processor_mask_find_next_gpu(const uvm_global_processor_mask_t *global_gpus, uvm_gpu_t *gpu)
+static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
 {
-    uvm_global_gpu_id_t gpu_id;
+    uvm_gpu_id_t gpu_id;

    UVM_ASSERT(gpu);

-    gpu_id = uvm_global_processor_mask_find_next_id(global_gpus, uvm_global_gpu_id_next(gpu->global_id));
-    if (UVM_GLOBAL_ID_IS_INVALID(gpu_id))
+    gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
+    if (UVM_ID_IS_INVALID(gpu_id))
        return NULL;

    gpu = uvm_gpu_get(gpu_id);

-    // See comment in uvm_global_processor_mask_find_first_gpu().
-    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_global_id_value(gpu_id));
+    // See comment in uvm_processor_mask_find_first_gpu().
+    UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));

    return gpu;
 }

 // Helper to iterate over all GPUs in the input mask
-#define for_each_global_gpu_in_mask(gpu, global_mask)                                         \
-    for (gpu = uvm_global_processor_mask_find_first_gpu(global_mask);                         \
+#define for_each_gpu_in_mask(gpu, mask)                         \
+    for (gpu = uvm_processor_mask_find_first_gpu(mask);         \
         gpu != NULL;                                           \
-         gpu = __uvm_global_processor_mask_find_next_gpu(global_mask, gpu))
+         gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))

-// Helper to iterate over all GPUs retained by the UVM driver (across all va spaces)
-#define for_each_global_gpu(gpu)                                                              \
+// Helper to iterate over all GPUs retained by the UVM driver
+// (across all va spaces).
+#define for_each_gpu(gpu)                                                              \
    for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock);                         \
-           gpu = uvm_global_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);});    \
+           gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);});    \
           gpu != NULL;                                                                \
-           gpu = __uvm_global_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
+           gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))

 // LOCKING: Must hold either the global_lock or the gpu_table_lock
 static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
@ -344,7 +334,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
    NvU32 i;

    if (parent_gpu) {
-        NvU32 gpu_index = uvm_id_gpu_index(parent_gpu->id);
+        NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
        i = gpu_index + 1;
    }
    else {
@ -353,7 +343,7 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren

    parent_gpu = NULL;

-    while (i < UVM_MAX_GPUS) {
+    while (i < UVM_PARENT_ID_MAX_GPUS) {
        if (g_uvm_global.parent_gpus[i]) {
            parent_gpu = g_uvm_global.parent_gpus[i];
            break;
@ -369,18 +359,18 @@ static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *paren
 static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
 {
    uvm_gpu_t *gpu = NULL;
-    uvm_global_gpu_id_t global_gpu_id;
+    uvm_gpu_id_t gpu_id;
    NvU32 sub_processor_index;
    NvU32 cur_sub_processor_index;

    UVM_ASSERT(parent_gpu);

-    global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
-    cur_sub_processor_index = cur_gpu ? uvm_global_id_sub_processor_index(cur_gpu->global_id) : -1;
+    gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
+    cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;

-    sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
-    if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
-        gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
+    sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
+    if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
+        gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
        UVM_ASSERT(gpu != NULL);
    }

@ -400,18 +390,18 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
         (gpu) != NULL;                                                                         \
         (gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))

-// Helper which calls uvm_gpu_retain on each GPU in mask
-void uvm_global_mask_retain(const uvm_global_processor_mask_t *mask);
+// Helper which calls uvm_gpu_retain() on each GPU in mask.
+void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);

 // Helper which calls uvm_gpu_release_locked on each GPU in mask.
 //
 // LOCKING: this function takes and releases the global lock if the input mask
 //          is not empty
-void uvm_global_mask_release(const uvm_global_processor_mask_t *mask);
+void uvm_global_gpu_release(const uvm_processor_mask_t *mask);

 // Check for ECC errors for all GPUs in a mask
 // Notably this check cannot be performed where it's not safe to call into RM.
-NV_STATUS uvm_global_mask_check_ecc_error(uvm_global_processor_mask_t *gpus);
+NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);

 // Pre-allocate fault service contexts.
 NV_STATUS uvm_service_block_context_init(void);
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@ -57,14 +57,16 @@

 typedef struct
 {
-    // Number of faults from this uTLB that have been fetched but have not been serviced yet
+    // Number of faults from this uTLB that have been fetched but have not been
+    // serviced yet.
    NvU32 num_pending_faults;

    // Whether the uTLB contains fatal faults
    bool has_fatal_faults;

-    // We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
-    // the uTLB in lockdown mode and no new translations are accepted
+    // We have issued a replay of type START_ACK_ALL while containing fatal
+    // faults. This puts the uTLB in lockdown mode and no new translations are
+    // accepted.
    bool in_lockdown;

    // We have issued a cancel on this uTLB
@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
        struct list_head service_context_list;

        // A mask of GPUs that need to be checked for ECC errors before the CPU
-        // fault handler returns, but after the VA space lock has been unlocked to
-        // avoid the RM/UVM VA space lock deadlocks.
+        // fault handler returns, but after the VA space lock has been unlocked
+        // to avoid the RM/UVM VA space lock deadlocks.
        uvm_processor_mask_t gpus_to_check_for_ecc;

        // This is set to throttle page fault thrashing.
@ -160,9 +162,9 @@ struct uvm_service_block_context_struct

    struct
    {
-        // Per-processor mask with the pages that will be resident after servicing.
-        // We need one mask per processor because we may coalesce faults that
-        // trigger migrations to different processors.
+        // Per-processor mask with the pages that will be resident after
+        // servicing. We need one mask per processor because we may coalesce
+        // faults that trigger migrations to different processors.
        uvm_page_mask_t new_residency;
    } per_processor_masks[UVM_ID_MAX_PROCESSORS];

@ -179,23 +181,28 @@ struct uvm_service_block_context_struct
 typedef struct
 {
    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
-    // VMA. Used for batching ATS faults in a vma.
+    // VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t read_fault_mask;

    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. Used for batching ATS faults in a vma.
+    // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t write_fault_mask;

    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
-    // of a SAM VMA. Used to return ATS fault status.
+    // of a SAM VMA. Used to return ATS fault status. This is unused for access
+    // counter service requests.
    uvm_page_mask_t faults_serviced_mask;

    // Mask of successfully serviced read faults on pages in write_fault_mask.
+    // This is unused for access counter service requests.
    uvm_page_mask_t reads_serviced_mask;

-    // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. This is used as input to the prefetcher.
-    uvm_page_mask_t faulted_mask;
+    // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
+    // VMA. This is used as input for access counter service requests and output
+    // of fault service requests.
+    uvm_page_mask_t accessed_mask;

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
@ -294,11 +301,8 @@ struct uvm_fault_service_batch_context_struct

 struct uvm_ats_fault_invalidate_struct
 {
-    // Whether the TLB batch contains any information
-    bool            write_faults_in_batch;
-
-    // Batch of TLB entries to be invalidated
-    uvm_tlb_batch_t write_faults_tlb_batch;
+    bool            tlb_batch_pending;
+    uvm_tlb_batch_t tlb_batch;
 };

 typedef struct
@ -443,20 +447,9 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                             num_notifications;

        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same instance_ptr
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same instance_ptr
        bool is_single_instance_ptr;
-
-        // Scratch space, used to generate artificial physically addressed notifications.
-        // Virtual address notifications are always aligned to 64k. This means up to 16
-        // different physical locations could have been accessed to trigger one notification.
-        // The sub-granularity mask can correspond to any of them.
-        struct
-        {
-            uvm_processor_id_t resident_processors[16];
-            uvm_gpu_phys_address_t phys_addresses[16];
-            uvm_access_counter_buffer_entry_t phys_entry;
-        } scratch;
    } virt;

    struct
@ -467,8 +460,8 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                              num_notifications;

        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same aperture
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same aperture
        bool                              is_single_aperture;
    } phys;

@ -478,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
    // Structure used to coalesce access counter servicing in a VA block
    uvm_service_block_context_t block_service_context;

+    // Structure used to service access counter migrations in an ATS block.
+    uvm_ats_fault_context_t ats_context;
+
    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;
 };
@ -610,10 +606,22 @@ typedef enum
    UVM_GPU_PEER_COPY_MODE_COUNT
 } uvm_gpu_peer_copy_mode_t;

+// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
+// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
+// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
+// partitions within the parent. The parent GPU and partition GPU have
+// different "id" and "uuid".
 struct uvm_gpu_struct
 {
    uvm_parent_gpu_t *parent;

+    // The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
+    NvProcessorUuid uuid;
+
+    // Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
+    // UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
+    char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
+
    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
    // except that some paths retain the GPU temporarily without a VA space.
@ -632,13 +640,9 @@ struct uvm_gpu_struct
    // user can create a lot of va spaces and register the gpu with them).
    atomic64_t retained_count;

-    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
-    // of the parent's id.
+    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS).
    uvm_gpu_id_t id;

-    // A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
-    uvm_global_gpu_id_t global_id;
-
    // Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
    NvU64 magic;

@ -664,8 +668,8 @@ struct uvm_gpu_struct
    struct
    {
        // Big page size used by the internal UVM VA space
-        // Notably it may be different than the big page size used by a user's VA
-        // space in general.
+        // Notably it may be different than the big page size used by a user's
+        // VA space in general.
        NvU32 internal_size;
    } big_page;

@ -691,8 +695,8 @@ struct uvm_gpu_struct
        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];

-        // Leaf spinlock used to synchronize access to the peer_gpus table so that
-        // it can be safely accessed from the access counters bottom half
+        // Leaf spinlock used to synchronize access to the peer_gpus table so
+        // that it can be safely accessed from the access counters bottom half
        uvm_spinlock_t peer_gpus_lock;
    } peer_info;

@ -852,6 +856,11 @@ struct uvm_gpu_struct
    bool uvm_test_force_upper_pushbuffer_segment;
 };

+// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
+// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
+// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
+// partitions within the parent. The parent GPU and partition GPU have
+// different "id" and "uuid".
 struct uvm_parent_gpu_struct
 {
    // Reference count for how many places are holding on to a parent GPU
@ -864,11 +873,11 @@ struct uvm_parent_gpu_struct
    // The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
    NvU32 num_retained_gpus;

-    uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];
+    uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];

    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
    // usable child GPU in bottom-halves.
-    DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
+    DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

    // The gpu's uuid
    NvProcessorUuid uuid;
@ -880,8 +889,8 @@ struct uvm_parent_gpu_struct
    // hardware classes, etc.).
    UvmGpuInfo rm_info;

-    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
-    uvm_gpu_id_t id;
+    // A unique uvm gpu id in range [1, UVM_PARENT_ID_MAX_PROCESSORS)
+    uvm_parent_gpu_id_t id;

    // Reference to the Linux PCI device
    //
@ -916,12 +925,13 @@ struct uvm_parent_gpu_struct
    // dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
    // referencing sysmem from the GPU, dma_addressable_start should be
    // subtracted from the physical address. The DMA mapping helpers like
-    // uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
+    // uvm_parent_gpu_map_cpu_pages() and uvm_parent_gpu_dma_alloc_page() take
+    // care of that.
    NvU64 dma_addressable_start;
    NvU64 dma_addressable_limit;

-    // Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
-    // sysmem pages, used for leak detection.
+    // Total size (in bytes) of physically mapped (with
+    // uvm_parent_gpu_map_cpu_pages) sysmem pages, used for leak detection.
    atomic64_t mapped_cpu_pages_size;

    // Hardware Abstraction Layer
@ -940,7 +950,11 @@ struct uvm_parent_gpu_struct
    // Virtualization mode of the GPU.
    UVM_VIRT_MODE virt_mode;

-    // Whether the GPU can trigger faults on prefetch instructions
+    // Pascal+ GPUs can trigger faults on prefetch instructions. If false, this
+    // feature must be disabled at all times in GPUs of the given architecture.
+    // If true, the feature can be toggled at will by SW.
+    //
+    // The field should not be used unless the GPU supports replayable faults.
    bool prefetch_fault_supported;

    // Number of membars required to flush out HSHUB following a TLB invalidate
@ -955,6 +969,11 @@ struct uvm_parent_gpu_struct

    bool access_counters_supported;

+    // If this is true, physical address based access counter notifications are
+    // potentially generated. If false, only virtual address based notifications
+    // are generated (assuming access_counters_supported is true too).
+    bool access_counters_can_use_physical_addresses;
+
    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
@ -981,6 +1000,10 @@ struct uvm_parent_gpu_struct

    bool plc_supported;

+    // If true, page_tree initialization pre-populates no_ats_ranges. It only
+    // affects ATS systems.
+    bool no_ats_range_required;
+
    // Parameters used by the TLB batching API
    struct
    {
@ -1052,14 +1075,16 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is set to true
+    // Fault buffer info. This is only valid if supports_replayable_faults is
+    // set to true.
    uvm_fault_buffer_info_t fault_buffer_info;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if supports_access_counters is set to true
+    // Access counter buffer info. This is only valid if
+    // supports_access_counters is set to true.
    uvm_access_counter_buffer_info_t access_counter_buffer_info;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
@ -1109,7 +1134,7 @@ struct uvm_parent_gpu_struct
    uvm_rb_tree_t instance_ptr_table;
    uvm_spinlock_t instance_ptr_table_lock;

-    // This is set to true if the GPU belongs to an SLI group. Else, set to false.
+    // This is set to true if the GPU belongs to an SLI group.
    bool sli_enabled;

    struct
@ -1136,8 +1161,8 @@ struct uvm_parent_gpu_struct
    // environment, rather than using the peer-id field of the PTE (which can
    // only address 8 gpus), all gpus are assigned a 47-bit physical address
    // space by the fabric manager. Any physical address access to these
-    // physical address spaces are routed through the switch to the corresponding
-    // peer.
+    // physical address spaces are routed through the switch to the
+    // corresponding peer.
    struct
    {
        bool is_nvswitch_connected;
@ -1175,9 +1200,14 @@ struct uvm_parent_gpu_struct
    } smmu_war;
 };

+static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
+{
+    return parent_gpu->name;
+}
+
 static const char *uvm_gpu_name(uvm_gpu_t *gpu)
 {
-    return gpu->parent->name;
+    return gpu->name;
 }

 static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
@ -1362,7 +1392,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
 // They must not be the same gpu.
 uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

-// Get the processor id accessible by the given GPU for the given physical address
+// Get the processor id accessible by the given GPU for the given physical
+// address.
 uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);

 // Get the P2P capabilities between the gpus with the given indexes
@ -1407,10 +1438,11 @@ static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
 // mapping covering the passed address, has been previously created.
 static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
 {
-    UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
+    UVM_ASSERT(uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
+               uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
    UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);

-    if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
+    if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent))
        UVM_ASSERT(gpu->static_flat_mapping.ready);

    return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
@ -1422,12 +1454,12 @@ static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu
 //
 // The actual GPU mapping only exists if a linear mapping covering the passed
 // address has been previously created.
-static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
+static uvm_gpu_address_t uvm_parent_gpu_address_virtual_from_sysmem_phys(uvm_parent_gpu_t *parent_gpu, NvU64 pa)
 {
-    UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
-    UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));
+    UVM_ASSERT(uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(parent_gpu));
+    UVM_ASSERT(pa <= (parent_gpu->dma_addressable_limit - parent_gpu->dma_addressable_start));

-    return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
+    return uvm_gpu_address_virtual(parent_gpu->flat_sysmem_va_base + pa);
 }

 // Given a GPU or CPU physical address (not peer), retrieve an address suitable
@ -1437,11 +1469,12 @@ static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_addre
    UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);

    if (phys_addr.aperture == UVM_APERTURE_VID) {
-        if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
+        if (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
+            uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent))
            return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
    }
-    else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
-        return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
+    else if (uvm_mmu_parent_gpu_needs_dynamic_sysmem_mapping(gpu->parent)) {
+        return uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, phys_addr.address);
    }

    return uvm_gpu_address_from_phys(phys_addr);
@ -1459,9 +1492,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);

 // Check for ECC errors without calling into RM
 //
-// Calling into RM is problematic in many places, this check is always safe to do.
-// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
-// it's required to call uvm_gpu_check_ecc_error() to be sure.
+// Calling into RM is problematic in many places, this check is always safe to
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
+// and it's required to call uvm_gpu_check_ecc_error() to be sure.
 NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);

 // Map size bytes of contiguous sysmem on the GPU for physical access
@ -1470,19 +1503,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
 //
 // Returns the physical address of the pages that can be used to access them on
 // the GPU.
-NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
+NV_STATUS uvm_parent_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);

-// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
-void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
+// Unmap num_pages pages previously mapped with uvm_parent_gpu_map_cpu_pages().
+void uvm_parent_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);

-static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
+static NV_STATUS uvm_parent_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
 {
-    return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
+    return uvm_parent_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
 }

-static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
+static void uvm_parent_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
 {
-    uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
+    uvm_parent_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
 }

 // Allocate and map a page of system DMA memory on the GPU for physical access
@ -1491,13 +1524,13 @@ static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_addre
 // - the address of the page that can be used to access them on
 //   the GPU in the dma_address_out parameter.
 // - the address of allocated memory in CPU virtual address space.
-void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
+void *uvm_parent_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
                                    gfp_t gfp_flags,
                                    NvU64 *dma_address_out);

 // Unmap and free size bytes of contiguous sysmem DMA previously allocated
-// with uvm_gpu_map_cpu_pages().
-void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);
+// with uvm_parent_gpu_map_cpu_pages().
+void uvm_parent_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);

 // Returns whether the given range is within the GPU's addressable VA ranges.
 // It requires the input 'addr' to be in canonical form for platforms compliant
@ -1518,6 +1551,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
 // The GPU must be initialized before calling this function.
 bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

+bool uvm_platform_uses_canonical_form_address(void);
+
 // Returns addr's canonical form for host systems that use canonical form
 // addresses.
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@ -1527,47 +1562,49 @@ static bool uvm_parent_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
    return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
 }

-static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_needs_pushbuffer_segments(uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->max_host_va > (1ull << 40);
+    return parent_gpu->max_host_va > (1ull << 40);
 }

-static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_supports_eviction(uvm_parent_gpu_t *parent_gpu)
 {
    // Eviction is supported only if the GPU supports replayable faults
-    return gpu->parent->replayable_faults_supported;
+    return parent_gpu->replayable_faults_supported;
 }

-static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov_heavy(const uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
+    return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
 }

-static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov_standard(const uvm_parent_gpu_t *parent_gpu)
 {
-    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
+    return parent_gpu->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
 }

 // Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
-static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_is_virt_mode_sriov(const uvm_parent_gpu_t *parent_gpu)
 {
-    return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
+    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu) ||
+           uvm_parent_gpu_is_virt_mode_sriov_standard(parent_gpu);
 }

-static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
+static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *parent_gpu)
 {
-    return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
+    return uvm_parent_gpu_is_virt_mode_sriov_heavy(parent_gpu);
 }

-uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
+uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);

 // Debug print of GPU properties
 void uvm_gpu_print(uvm_gpu_t *gpu);

-// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
-// half GPU page fault handler uses this to look up the VA space for GPU faults.
-NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
-void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
+// Add the given instance pointer -> user_channel mapping to this GPU. The
+// bottom half GPU page fault handler uses this to look up the VA space for GPU
+// faults.
+NV_STATUS uvm_parent_gpu_add_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);
+void uvm_parent_gpu_remove_user_channel(uvm_parent_gpu_t *parent_gpu, uvm_user_channel_t *user_channel);

 // Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
 //  NV_OK                        Translation successful
@ -1578,11 +1615,11 @@ void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channe
 // out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
 // is responsibile for ensuring that the returned va_space can't be destroyed,
 // so these functions should only be called from the bottom half.
-NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
+NV_STATUS uvm_parent_gpu_fault_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
                                                 uvm_fault_buffer_entry_t *fault,
                                                 uvm_va_space_t **out_va_space);

-NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
+NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *parent_gpu,
                                                          uvm_access_counter_buffer_entry_t *entry,
                                                          uvm_va_space_t **out_va_space);

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -67,21 +67,21 @@ static void access_counters_isr_bottom_half_entry(void *args);
 // interrupts should be disabled. The caller is guaranteed that replayable page
 // faults are disabled upon return. Interrupts might already be disabled prior
 // to making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_gpu_replayable_faults_intr_enable().
+// call to uvm_parent_gpu_replayable_faults_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);
+static void uvm_parent_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu);

 // Decrements the reference count tracking whether replayable page fault
 // interrupts should be disabled. Only once the count reaches 0 are the HW
 // interrupts actually enabled, so this call does not guarantee that the
 // interrupts have been re-enabled upon return.
 //
-// uvm_gpu_replayable_faults_intr_disable() must have been called prior to
-// calling this function.
+// uvm_parent_gpu_replayable_faults_intr_disable() must have been called prior
+// to calling this function.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);
+static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu);

 static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
 {
@ -108,7 +108,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled here to avoid an interrupt storm
-    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+    uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);

    // Schedule a bottom half, but do *not* release the GPU ISR lock. The bottom
    // half releases the GPU ISR lock as part of its cleanup.
@ -175,7 +175,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
    nv_kref_get(&parent_gpu->gpu_kref);

    // Interrupts need to be disabled to avoid an interrupt storm
-    uvm_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);

    nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
                                 &parent_gpu->isr.access_counters.bottom_half_q_item);
@ -288,7 +288,7 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
    return errno_to_nv_status(nv_kthread_q_init(queue, name));
 }

-NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
+NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
 {
    NV_STATUS status = NV_OK;
    char kthread_name[TASK_COMM_LEN + 1];
@ -299,7 +299,7 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
        if (status != NV_OK) {
            UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
                          nvstatusToString(status),
-                          parent_gpu->name);
+                          uvm_parent_gpu_name(parent_gpu));
            return status;
        }

@ -320,12 +320,12 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)

        parent_gpu->isr.replayable_faults.handling = true;

-        snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_id_value(parent_gpu->id));
+        snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u BH", uvm_parent_id_value(parent_gpu->id));
        status = init_queue_on_node(&parent_gpu->isr.bottom_half_q, kthread_name, parent_gpu->closest_cpu_numa_node);
        if (status != NV_OK) {
            UVM_ERR_PRINT("Failed in nv_kthread_q_init for bottom_half_q: %s, GPU %s\n",
                          nvstatusToString(status),
-                          parent_gpu->name);
+                          uvm_parent_gpu_name(parent_gpu));
            return status;
        }

@ -348,14 +348,14 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)

            parent_gpu->isr.non_replayable_faults.handling = true;

-            snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_id_value(parent_gpu->id));
+            snprintf(kthread_name, sizeof(kthread_name), "UVM GPU%u KC", uvm_parent_id_value(parent_gpu->id));
            status = init_queue_on_node(&parent_gpu->isr.kill_channel_q,
                                        kthread_name,
                                        parent_gpu->closest_cpu_numa_node);
            if (status != NV_OK) {
                UVM_ERR_PRINT("Failed in nv_kthread_q_init for kill_channel_q: %s, GPU %s\n",
                              nvstatusToString(status),
-                              parent_gpu->name);
+                              uvm_parent_gpu_name(parent_gpu));
                return status;
            }
        }
@ -365,7 +365,7 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
            if (status != NV_OK) {
                UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
                              nvstatusToString(status),
-                              parent_gpu->name);
+                              uvm_parent_gpu_name(parent_gpu));
                return status;
            }

@ -393,13 +393,13 @@ NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
    return NV_OK;
 }

-void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
 {
    nv_kthread_q_flush(&parent_gpu->isr.bottom_half_q);
    nv_kthread_q_flush(&parent_gpu->isr.kill_channel_q);
 }

-void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);

@ -408,7 +408,7 @@ void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
    // any more bottom halves.
    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+    uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);

    parent_gpu->isr.replayable_faults.was_handling = parent_gpu->isr.replayable_faults.handling;
    parent_gpu->isr.non_replayable_faults.was_handling = parent_gpu->isr.non_replayable_faults.handling;
@ -428,7 +428,7 @@ void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
    nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
 }

-void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_va_block_context_t *block_context;

@ -436,22 +436,23 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
    if (parent_gpu->isr.replayable_faults.was_handling) {
        // No user threads could have anything left on
        // replayable_faults.disable_intr_ref_count since they must retain the
-        // GPU across uvm_gpu_replayable_faults_isr_lock/
-        // uvm_gpu_replayable_faults_isr_unlock. This means the
+        // GPU across uvm_parent_gpu_replayable_faults_isr_lock/
+        // uvm_parent_gpu_replayable_faults_isr_unlock. This means the
        // uvm_gpu_replayable_faults_disable_intr above could only have raced
        // with bottom halves.
        //
        // If we cleared replayable_faults.handling before the bottom half got
-        // to its uvm_gpu_replayable_faults_isr_unlock, when it eventually
-        // reached uvm_gpu_replayable_faults_isr_unlock it would have skipped
-        // the disable, leaving us with extra ref counts here.
+        // to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
+        // eventually reached uvm_parent_gpu_replayable_faults_isr_unlock it
+        // would have skipped the disable, leaving us with extra ref counts
+        // here.
        //
        // In any case we're guaranteed that replayable faults interrupts are
        // disabled and can't get re-enabled, so we can safely ignore the ref
        // count value and just clean things up.
        UVM_ASSERT_MSG(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0,
                       "%s replayable_faults.disable_intr_ref_count: %llu\n",
-                       parent_gpu->name,
+                       uvm_parent_gpu_name(parent_gpu),
                       parent_gpu->isr.replayable_faults.disable_intr_ref_count);

        uvm_gpu_fault_buffer_deinit(parent_gpu);
@ -480,7 +481,6 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)

 static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
 {
-    uvm_global_gpu_id_t global_gpu_id = uvm_global_gpu_id_from_gpu_id(parent_gpu->id);
    uvm_gpu_t *gpu;

    // When SMC is enabled, there's no longer a 1:1 relationship between the
@ -495,10 +495,10 @@ static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)

        uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);

-        sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);
+        sub_processor_index = find_first_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);

-        if (sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS) {
-            gpu = uvm_gpu_get(uvm_global_id_from_value(uvm_global_id_value(global_gpu_id) + sub_processor_index));
+        if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
+            gpu = parent_gpu->gpus[sub_processor_index];
            UVM_ASSERT(gpu != NULL);
        }
        else {
@ -508,7 +508,7 @@ static uvm_gpu_t *find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
        uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
    }
    else {
-        gpu = uvm_gpu_get(global_gpu_id);
+        gpu = parent_gpu->gpus[0];
        UVM_ASSERT(gpu != NULL);
    }

@ -547,12 +547,12 @@ static void replayable_faults_isr_bottom_half(void *args)

    uvm_gpu_service_replayable_faults(gpu);

-    uvm_gpu_replayable_faults_isr_unlock(parent_gpu);
+    uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);

 put_kref:
    // It is OK to drop a reference on the parent GPU if a bottom half has
-    // been retriggered within uvm_gpu_replayable_faults_isr_unlock, because the
-    // rescheduling added an additional reference.
+    // been retriggered within uvm_parent_gpu_replayable_faults_isr_unlock,
+    // because the rescheduling added an additional reference.
    uvm_parent_gpu_kref_put(parent_gpu);
 }

@ -573,7 +573,7 @@ static void non_replayable_faults_isr_bottom_half(void *args)

    UVM_ASSERT(parent_gpu->non_replayable_faults_supported);

-    uvm_gpu_non_replayable_faults_isr_lock(parent_gpu);
+    uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);

    // Multiple bottom halves for non-replayable faults can be running
    // concurrently, but only one can enter this section for a given GPU
@ -586,7 +586,7 @@ static void non_replayable_faults_isr_bottom_half(void *args)

    uvm_gpu_service_non_replayable_fault_buffer(gpu);

-    uvm_gpu_non_replayable_faults_isr_unlock(parent_gpu);
+    uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);

 put_kref:
    uvm_parent_gpu_kref_put(parent_gpu);
@ -622,7 +622,7 @@ static void access_counters_isr_bottom_half(void *args)

    uvm_gpu_service_access_counters(gpu);

-    uvm_gpu_access_counters_isr_unlock(parent_gpu);
+    uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);

 put_kref:
    uvm_parent_gpu_kref_put(parent_gpu);
@ -651,7 +651,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
    //
    //   (1) UVM didn't process all the entries up to cached PUT
    //
-    //   (2) UVM did process all the entries up to cached PUT, but GPS-RM
+    //   (2) UVM did process all the entries up to cached PUT, but GSP-RM
    //       added new entries such that cached PUT is out-of-date
    //
    // In both cases, re-enablement of interrupts would have caused the
@ -663,7 +663,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
    // While in the typical case the retriggering happens within a replayable
    // fault bottom half, it can also happen within a non-interrupt path such as
    // uvm_gpu_fault_buffer_flush.
-    if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
+    if (g_uvm_global.conf_computing_enabled)
        retrigger = true;

    if (!retrigger)
@ -678,7 +678,7 @@ static void replayable_faults_retrigger_bottom_half(uvm_parent_gpu_t *parent_gpu
    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }

-void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

@ -687,7 +687,7 @@ void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
    // Bump the disable ref count. This guarantees that the bottom half or
    // another thread trying to take the replayable_faults.service_lock won't
    // inadvertently re-enable interrupts during this locking sequence.
-    uvm_gpu_replayable_faults_intr_disable(parent_gpu);
+    uvm_parent_gpu_replayable_faults_intr_disable(parent_gpu);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

@ -696,7 +696,7 @@ void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
    uvm_down(&parent_gpu->isr.replayable_faults.service_lock);
 }

-void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

@ -733,9 +733,10 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
        // Note that if we're in the bottom half and the GPU was removed before
        // we checked replayable_faults.handling, we won't drop our interrupt
        // disable ref count from the corresponding top-half call to
-        // uvm_gpu_replayable_faults_intr_disable. That's ok because remove_gpu
-        // ignores the refcount after waiting for the bottom half to finish.
-        uvm_gpu_replayable_faults_intr_enable(parent_gpu);
+        // uvm_parent_gpu_replayable_faults_intr_disable. That's ok because
+        // remove_gpu ignores the refcount after waiting for the bottom half to
+        // finish.
+        uvm_parent_gpu_replayable_faults_intr_enable(parent_gpu);

        // Rearm pulse interrupts. This guarantees that the state of the pending
        // interrupt is current and the top level rearm performed by RM is only
@ -762,42 +763,42 @@ void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
    replayable_faults_retrigger_bottom_half(parent_gpu);
 }

-void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

    uvm_down(&parent_gpu->isr.non_replayable_faults.service_lock);
 }

-void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

    uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
 }

-void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
 {
-    // See comments in uvm_gpu_replayable_faults_isr_lock
+    // See comments in uvm_parent_gpu_replayable_faults_isr_lock

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_gpu_access_counters_intr_disable(parent_gpu);
+    uvm_parent_gpu_access_counters_intr_disable(parent_gpu);

    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);

    uvm_down(&parent_gpu->isr.access_counters.service_lock);
 }

-void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
 {
    UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);

-    // See comments in uvm_gpu_replayable_faults_isr_unlock
+    // See comments in uvm_parent_gpu_replayable_faults_isr_unlock

    uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);

-    uvm_gpu_access_counters_intr_enable(parent_gpu);
+    uvm_parent_gpu_access_counters_intr_enable(parent_gpu);

    if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
        parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
@ -811,7 +812,7 @@ void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
    uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
 }

-static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
+static void uvm_parent_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

@ -821,7 +822,7 @@ static void uvm_gpu_replayable_faults_intr_disable(uvm_parent_gpu_t *parent_gpu)
    ++parent_gpu->isr.replayable_faults.disable_intr_ref_count;
 }

-static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
+static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
    UVM_ASSERT(parent_gpu->isr.replayable_faults.disable_intr_ref_count > 0);
@ -831,7 +832,7 @@ static void uvm_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *parent_gpu)
        parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
 }

-void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);

@ -848,7 +849,7 @@ void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
    ++parent_gpu->isr.access_counters.disable_intr_ref_count;
 }

-void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
+void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
--- a/kernel-open/nvidia-uvm/uvm_gpu_isr.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_isr.h
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2019 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -131,19 +131,19 @@ typedef struct
 NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);

 // Initialize ISR handling state
-NV_STATUS uvm_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
+NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);

 // Flush any currently scheduled bottom halves.  This is called during GPU
 // removal.
-void uvm_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);

 // Prevent new bottom halves from being scheduled. This is called during parent
 // GPU removal.
-void uvm_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);

 // Destroy ISR handling state and return interrupt ownership to RM. This is
 // called during parent GPU removal
-void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);

 // Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
 // half thread.  This will also disable replayable page fault interrupts (if
@ -151,46 +151,46 @@ void uvm_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
 // would cause an interrupt storm if we didn't disable them first.
 //
 // At least one GPU under the parent must have been previously retained.
-void uvm_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);

 // Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
 // re-enable replayable page fault interrupts.  Unlike
-// uvm_gpu_replayable_faults_isr_lock(), which should only called from
+// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
 // non-top/bottom half threads, this can be called by any thread.
-void uvm_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Lock/unlock routines for non-replayable faults. These do not need to prevent
 // interrupt storms since the GPU fault buffers for non-replayable faults are
-// managed by RM.  Unlike uvm_gpu_replayable_faults_isr_lock, no GPUs under
-// the parent need to have been previously retained.
-void uvm_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+// managed by RM.  Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
+// under the parent need to have been previously retained.
+void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);

-// See uvm_gpu_replayable_faults_isr_lock/unlock
-void uvm_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
-void uvm_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
+// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
+void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);

 // Increments the reference count tracking whether access counter interrupts
 // should be disabled. The caller is guaranteed that access counter interrupts
 // are disabled upon return. Interrupts might already be disabled prior to
 // making this call. Each call is ref-counted, so this must be paired with a
-// call to uvm_gpu_access_counters_intr_enable().
+// call to uvm_parent_gpu_access_counters_intr_enable().
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);

 // Decrements the reference count tracking whether access counter interrupts
 // should be disabled. Only once the count reaches 0 are the HW interrupts
 // actually enabled, so this call does not guarantee that the interrupts have
 // been re-enabled upon return.
 //
-// uvm_gpu_access_counters_intr_disable() must have been called prior to calling
-// this function.
+// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
+// calling this function.
 //
 // NOTE: For pulse-based interrupts, the caller is responsible for re-arming
 // the interrupt.
 //
 // parent_gpu->isr.interrupts_lock must be held to call this function.
-void uvm_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
+void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);

 #endif // __UVM_GPU_ISR_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2022 NVIDIA Corporation
+    Copyright (c) 2017-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -196,7 +196,7 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceGetNonReplayableFaults() failed: %s, GPU %s\n",
                      nvstatusToString(status),
-                      parent_gpu->name);
+                      uvm_parent_gpu_name(parent_gpu));

        uvm_global_set_fatal_error(status);
        return status;
@ -235,17 +235,27 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    return NV_OK;
 }

-// In SRIOV, the UVM (guest) driver does not have access to the privileged
-// registers used to clear the faulted bit. Instead, UVM requests host RM to do
-// the clearing on its behalf, using a SW method.
 static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
 {
-    if (uvm_gpu_is_virt_mode_sriov(gpu)) {
-        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
-        return true;
-    }
+    // If true, UVM uses a SW method to request RM to do the clearing on its
+    // behalf.
+    bool use_sw_method = false;

-    return false;
+    // In SRIOV, the UVM (guest) driver does not have access to the privileged
+    // registers used to clear the faulted bit.
+    if (uvm_parent_gpu_is_virt_mode_sriov(gpu->parent))
+        use_sw_method = true;
+
+    // In Confidential Computing access to the privileged registers is blocked,
+    // in order to prevent interference between guests, or between the
+    // (untrusted) host and the guests.
+    if (g_uvm_global.conf_computing_enabled)
+        use_sw_method = true;
+
+    if (use_sw_method)
+        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
+
+    return use_sw_method;
 }

 static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
@ -334,7 +344,8 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
                                                       uvm_va_block_t *va_block,
                                                       uvm_va_block_retry_t *va_block_retry,
                                                       uvm_fault_buffer_entry_t *fault_entry,
-                                                       uvm_service_block_context_t *service_context)
+                                                       uvm_service_block_context_t *service_context,
+                                                       const bool hmm_migratable)
 {
    NV_STATUS status = NV_OK;
    uvm_page_index_t page_index;
@ -400,6 +411,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
                                                  policy,
                                                  &thrashing_hint,
                                                  UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS,
+                                                  hmm_migratable,
                                                  &read_duplicate);

    // Initialize the minimum necessary state in the fault service context
@ -431,7 +443,8 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,

 static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
                                                uvm_va_block_t *va_block,
-                                                uvm_fault_buffer_entry_t *fault_entry)
+                                                uvm_fault_buffer_entry_t *fault_entry,
+                                                const bool hmm_migratable)
 {
    NV_STATUS status, tracker_status;
    uvm_va_block_retry_t va_block_retry;
@ -440,10 +453,8 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
    service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
    service_context->num_retries = 0;

-    if (uvm_va_block_is_hmm(va_block)) {
-        uvm_hmm_service_context_init(service_context);
+    if (uvm_va_block_is_hmm(va_block))
        uvm_hmm_migrate_begin_wait(va_block);
-    }

    uvm_mutex_lock(&va_block->lock);

@ -452,7 +463,8 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
                                                                             va_block,
                                                                             &va_block_retry,
                                                                             fault_entry,
-                                                                             service_context));
+                                                                             service_context,
+                                                                             hmm_migratable));

    tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
                                                  &va_block->tracker);
@ -570,7 +582,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,

        ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;

-        ats_invalidate->write_faults_in_batch = false;
+        ats_invalidate->tlb_batch_pending = false;

        va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);

@ -619,7 +631,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
+static NV_STATUS service_fault_once(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry, const bool hmm_migratable)
 {
    NV_STATUS status;
    uvm_user_channel_t *user_channel;
@ -631,7 +643,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
    uvm_va_block_context_t *va_block_context =
        gpu->parent->fault_buffer_info.non_replayable.block_service_context.block_context;

-    status = uvm_gpu_fault_entry_to_va_space(gpu, fault_entry, &va_space);
+    status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, fault_entry, &va_space);
    if (status != NV_OK) {
        // The VA space lookup will fail if we're running concurrently with
        // removal of the channel from the VA space (channel unregister, GPU VA
@ -691,7 +703,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
                                                      &va_block);
        }
        if (status == NV_OK)
-            status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
+            status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry, hmm_migratable);
        else
            status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);

@ -708,21 +720,46 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
    }

    if (fault_entry->is_fatal)
-        uvm_tools_record_gpu_fatal_fault(gpu->parent->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);
+        uvm_tools_record_gpu_fatal_fault(gpu->id, fault_entry->va_space, fault_entry, fault_entry->fatal_reason);

-    if (status != NV_OK || fault_entry->is_fatal)
+    if (fault_entry->is_fatal ||
+        (status != NV_OK &&
+         status != NV_WARN_MORE_PROCESSING_REQUIRED &&
+         status != NV_WARN_MISMATCHED_TARGET))
        schedule_kill_channel(gpu, fault_entry, user_channel);

 exit_no_channel:
    uvm_va_space_up_read(va_space);
    uvm_va_space_mm_release_unlock(va_space, mm);

-    if (status != NV_OK)
+    if (status != NV_OK &&
+        status != NV_WARN_MORE_PROCESSING_REQUIRED &&
+        status != NV_WARN_MISMATCHED_TARGET)
        UVM_DBG_PRINT("Error servicing non-replayable faults on GPU: %s\n", uvm_gpu_name(gpu));

    return status;
 }

+static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_entry)
+{
+    uvm_service_block_context_t *service_context =
+        &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
+    NV_STATUS status;
+    bool hmm_migratable = true;
+
+    service_context->num_retries = 0;
+
+    do {
+        status = service_fault_once(gpu, fault_entry, hmm_migratable);
+        if (status == NV_WARN_MISMATCHED_TARGET) {
+            hmm_migratable = false;
+            status = NV_WARN_MORE_PROCESSING_REQUIRED;
+        }
+    } while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
+
+    return status;
+}
+
 void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu)
 {
    NvU32 cached_faults;
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@ -139,7 +139,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp

    if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
        pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
-                parent_gpu->name,
+                uvm_parent_gpu_name(parent_gpu),
                uvm_perf_fault_batch_count,
                UVM_PERF_FAULT_BATCH_COUNT_MIN,
                replayable_faults->max_faults,
@ -171,7 +171,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to take page fault ownership from RM: %s, GPU %s\n",
                      nvstatusToString(status),
-                      parent_gpu->name);
+                      uvm_parent_gpu_name(parent_gpu));
        return status;
    }

@ -181,7 +181,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp

    if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
        pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
-                parent_gpu->name,
+                uvm_parent_gpu_name(parent_gpu),
                uvm_perf_fault_replay_policy,
                replayable_faults->replay_policy);
    }
@ -189,7 +189,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
    replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
    if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
        pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
-                parent_gpu->name,
+                uvm_parent_gpu_name(parent_gpu),
                uvm_perf_fault_replay_update_put_ratio,
                replayable_faults->replay_update_put_ratio);
    }
@ -238,7 +238,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
                      nvstatusToString(status),
-                      parent_gpu->name);
+                      uvm_parent_gpu_name(parent_gpu));

        // nvUvmInterfaceInitFaultInfo may leave fields in rm_info populated
        // when it returns an error. Set the buffer handle to zero as it is
@ -297,19 +297,6 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
    }
 }

-// TODO: Bug 4098289: this function can be removed, and the calls to it replaced
-// with calls to uvm_conf_computing_mode_enabled_parent, once UVM ownership is
-// dictated by Confidential Computing enablement. Currently we support a
-// non-production scenario in which Confidential Computing is enabled, but
-// UVM still owns the replayable fault buffer.
-bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu)
-{
-    if  (uvm_conf_computing_mode_enabled_parent(parent_gpu))
-        return parent_gpu->fault_buffer_info.rm_info.replayable.bUvmOwnsHwFaultBuffer;
-
-    return true;
-}
-
 bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
 {
    uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
@ -362,7 +349,8 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
                                        "Cancel targeting instance_ptr {0x%llx:%s}\n",
                                        instance_ptr.address,
                                        uvm_aperture_string(instance_ptr.aperture));
-    } else {
+    }
+    else {
        status = uvm_push_begin_acquire(gpu->channel_manager,
                                        UVM_CHANNEL_TYPE_MEMOPS,
                                        &replayable_faults->replay_tracker,
@ -559,13 +547,9 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
    // fault_buffer_flush_locked is called, are now either flushed from the HW
    // buffer, or are present in the shadow buffer and are about to be discarded
    // too.
-    if (!uvm_conf_computing_mode_enabled_parent(parent_gpu))
+    if (!g_uvm_global.conf_computing_enabled)
        return NV_OK;

-    // nvUvmInterfaceFlushReplayableFaultBuffer relies on the caller to ensure
-    // serialization for a given GPU.
-    UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
-
    // Flush the HW replayable buffer owned by GSP-RM.
    status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);

@ -581,7 +565,7 @@ static void fault_buffer_skip_replayable_entry(uvm_parent_gpu_t *parent_gpu, NvU
    // Flushed faults are never decrypted, but the decryption IV associated with
    // replayable faults still requires manual adjustment so it is kept in sync
    // with the encryption IV on the GSP-RM's side.
-    if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu))
+    if (g_uvm_global.conf_computing_enabled)
        uvm_conf_computing_fault_increment_decrypt_iv(parent_gpu, 1);

    parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index);
@ -644,7 +628,7 @@ NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu)
    UVM_ASSERT(gpu->parent->replayable_faults_supported);

    // Disables replayable fault interrupts and fault servicing
-    uvm_gpu_replayable_faults_isr_lock(gpu->parent);
+    uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);

    status = fault_buffer_flush_locked(gpu,
                                       UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
@ -653,7 +637,7 @@ NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu)

    // This will trigger the top half to start servicing faults again, if the
    // replay brought any back in
-    uvm_gpu_replayable_faults_isr_unlock(gpu->parent);
+    uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
    return status;
 }

@ -990,7 +974,7 @@ static NV_STATUS translate_instance_ptrs(uvm_gpu_t *gpu,
            continue;
        }

-        status = uvm_gpu_fault_entry_to_va_space(gpu, current_entry, &current_entry->va_space);
+        status = uvm_parent_gpu_fault_entry_to_va_space(gpu->parent, current_entry, &current_entry->va_space);
        if (status != NV_OK) {
            if (status == NV_ERR_PAGE_TABLE_NOT_AVAIL) {
                // The channel is valid but the subcontext is not. This can only
@ -1310,6 +1294,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
                                                  uvm_va_block_retry_t *va_block_retry,
                                                  uvm_fault_service_batch_context_t *batch_context,
                                                  NvU32 first_fault_index,
+                                                  const bool hmm_migratable,
                                                  NvU32 *block_faults)
 {
    NV_STATUS status = NV_OK;
@ -1480,6 +1465,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
                                                      policy,
                                                      &thrashing_hint,
                                                      UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS,
+                                                      hmm_migratable,
                                                      &read_duplicate);

        if (!uvm_processor_mask_test_and_set(&block_context->resident_processors, new_residency))
@ -1532,6 +1518,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
                                           uvm_va_block_t *va_block,
                                           uvm_fault_service_batch_context_t *batch_context,
                                           NvU32 first_fault_index,
+                                           const bool hmm_migratable,
                                           NvU32 *block_faults)
 {
    NV_STATUS status;
@ -1553,6 +1540,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
                                                                        &va_block_retry,
                                                                        batch_context,
                                                                        first_fault_index,
+                                                                        hmm_migratable,
                                                                        block_faults));

    tracker_status = uvm_tracker_add_tracker_safe(&batch_context->tracker, &va_block->tracker);
@ -1631,23 +1619,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
    const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
    const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
-    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;

    UVM_ASSERT(vma);

    ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;

-    uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
+    uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);

    status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);

    // Remove prefetched pages from the serviced mask since fault servicing
    // failures belonging to prefetch pages need to be ignored.
-    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
+    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);

-    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
+    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));

-    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
+    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
        (*block_faults) += (fault_index_end - fault_index_start);
        return status;
    }
@ -1679,7 +1667,8 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
        if (access_type <= UVM_FAULT_ACCESS_TYPE_READ) {
            cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
        }
-        else if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) {
+	else {
+            UVM_ASSERT(access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
            if (uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ) &&
                !uvm_page_mask_test(reads_serviced_mask, page_index))
                cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
@ -1856,7 +1845,8 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
                                              uvm_fault_service_batch_context_t *batch_context,
                                              NvU32 fault_index,
                                              NvU32 *block_faults,
-                                              bool replay_per_va_block)
+                                              bool replay_per_va_block,
+                                              const bool hmm_migratable)
 {
    NV_STATUS status;
    uvm_va_range_t *va_range = NULL;
@ -1887,7 +1877,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
        status = NV_ERR_INVALID_ADDRESS;

    if (status == NV_OK) {
-        status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, block_faults);
+        status = service_fault_batch_block(gpu, va_block, batch_context, fault_index, hmm_migratable, block_faults);
    }
    else if ((status == NV_ERR_INVALID_ADDRESS) && uvm_ats_can_service_faults(gpu_va_space, mm)) {
        NvU64 outer = ~0ULL;
@ -1991,9 +1981,6 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
    // in the HW buffer. When GSP owns the HW buffer, we also have to wait for
    // GSP to copy all available faults from the HW buffer into the shadow
    // buffer.
-    //
-    // TODO: Bug 2533557: This flush does not actually guarantee that GSP will
-    //       copy over all faults.
    status = hw_fault_buffer_flush_locked(gpu->parent);
    if (status != NV_OK)
        goto done;
@ -2064,19 +2051,19 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
        else {
            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
            NvU32 block_faults;
+            const bool hmm_migratable = true;

-            ats_invalidate->write_faults_in_batch = false;
-            uvm_hmm_service_context_init(service_context);
+            ats_invalidate->tlb_batch_pending = false;

            // Service all the faults that we can. We only really need to search
            // for fatal faults, but attempting to service all is the easiest
            // way to do that.
-            status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
+            status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false, hmm_migratable);
            if (status != NV_OK) {
                // TODO: Bug 3900733: clean up locking in service_fault_batch().
                // We need to drop lock and retry. That means flushing and
                // starting over.
-                if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
+                if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
                    status = NV_OK;

                break;
@ -2148,11 +2135,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
    uvm_service_block_context_t *service_context =
        &gpu->parent->fault_buffer_info.replayable.block_service_context;
    uvm_va_block_context_t *va_block_context = service_context->block_context;
+    bool hmm_migratable = true;

    UVM_ASSERT(gpu->parent->replayable_faults_supported);

-    ats_invalidate->write_faults_in_batch = false;
-    uvm_hmm_service_context_init(service_context);
+    ats_invalidate->tlb_batch_pending = false;

    for (i = 0; i < batch_context->num_coalesced_faults;) {
        NvU32 block_faults;
@ -2215,9 +2202,12 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
                                              batch_context,
                                              i,
                                              &block_faults,
-                                              replay_per_va_block);
+                                              replay_per_va_block,
+                                              hmm_migratable);
        // TODO: Bug 3900733: clean up locking in service_fault_batch().
-        if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
+        if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET) {
+            if (status == NV_WARN_MISMATCHED_TARGET)
+                hmm_migratable = false;
            uvm_va_space_up_read(va_space);
            uvm_va_space_mm_release_unlock(va_space, mm);
            mm = NULL;
@ -2229,6 +2219,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
        if (status != NV_OK)
            goto fail;

+        hmm_migratable = true;
        i += block_faults;

        // Don't issue replays in cancel mode
@ -2348,7 +2339,7 @@ static void record_fatal_fault_helper(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *
    UVM_ASSERT(va_space);
    uvm_va_space_down_read(va_space);
    // Record fatal fault event
-    uvm_tools_record_gpu_fatal_fault(gpu->parent->id, va_space, entry, reason);
+    uvm_tools_record_gpu_fatal_fault(gpu->id, va_space, entry, reason);
    uvm_va_space_up_read(va_space);
 }

@ -2538,10 +2529,10 @@ static void cancel_fault_batch_tlb(uvm_gpu_t *gpu,
        // The list iteration below skips the entry used as 'head'.
        // Report the 'head' entry explicitly.
        uvm_va_space_down_read(current_entry->va_space);
-        uvm_tools_record_gpu_fatal_fault(gpu->parent->id, current_entry->va_space, current_entry, reason);
+        uvm_tools_record_gpu_fatal_fault(gpu->id, current_entry->va_space, current_entry, reason);

        list_for_each_entry(coalesced_entry, &current_entry->merged_instances_list, merged_instances_list)
-            uvm_tools_record_gpu_fatal_fault(gpu->parent->id, current_entry->va_space, coalesced_entry, reason);
+            uvm_tools_record_gpu_fatal_fault(gpu->id, current_entry->va_space, coalesced_entry, reason);
        uvm_va_space_up_read(current_entry->va_space);

        // We need to cancel each instance pointer to correctly handle faults from multiple contexts.
@ -2948,9 +2939,9 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA
    uvm_spin_loop_init(&spin);

    do {
-        uvm_gpu_replayable_faults_isr_lock(gpu->parent);
+        uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
        pending = uvm_gpu_replayable_faults_pending(gpu->parent);
-        uvm_gpu_replayable_faults_isr_unlock(gpu->parent);
+        uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);

        if (!pending)
            break;
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.h
@ -74,8 +74,4 @@ void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
 // Service pending replayable faults on the given GPU. This function must be
 // only called from the ISR bottom half
 void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu);
-
-// Returns true if UVM owns the hardware replayable fault buffer
-bool uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(uvm_parent_gpu_t *parent_gpu);
-
 #endif // __UVM_GPU_PAGE_FAULT_H__
--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2022 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -72,7 +72,7 @@ struct uvm_gpu_semaphore_pool_page_struct

 static bool gpu_semaphore_pool_is_secure(uvm_gpu_semaphore_pool_t *pool)
 {
-    return uvm_conf_computing_mode_enabled(pool->gpu) && (pool->aperture == UVM_APERTURE_VID);
+    return g_uvm_global.conf_computing_enabled && (pool->aperture == UVM_APERTURE_VID);
 }

 static bool gpu_semaphore_is_secure(uvm_gpu_semaphore_t *semaphore)
@ -366,7 +366,7 @@ NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore
 {
    NV_STATUS status;

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);

    status = uvm_gpu_semaphore_pool_create(gpu, pool_out);
    if (status == NV_OK)
@ -498,7 +498,7 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_
    // those cases.
    //
    // But if a pointer is in the table it must match.
-    table_gpu = uvm_gpu_get(gpu->global_id);
+    table_gpu = uvm_gpu_get(gpu->id);
    if (table_gpu)
        UVM_ASSERT(table_gpu == gpu);

@ -509,16 +509,11 @@ static bool tracking_semaphore_check_gpu(uvm_gpu_tracking_semaphore_t *tracking_

 bool tracking_semaphore_uses_mutex(uvm_gpu_tracking_semaphore_t *tracking_semaphore)
 {
-    uvm_gpu_t *gpu = tracking_semaphore->semaphore.page->pool->gpu;
-
    UVM_ASSERT(tracking_semaphore_check_gpu(tracking_semaphore));
-    if (uvm_conf_computing_mode_enabled(gpu))
-        return true;

-    return false;
+    return g_uvm_global.conf_computing_enabled;
 }

-
 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem)
 {
    NV_STATUS status;
@ -532,7 +527,7 @@ NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_g

    UVM_ASSERT(uvm_gpu_semaphore_get_payload(&tracking_sem->semaphore) == 0);

-    if (uvm_conf_computing_mode_enabled(pool->gpu))
+    if (g_uvm_global.conf_computing_enabled)
        order = UVM_LOCK_ORDER_SECURE_SEMAPHORE;

    if (tracking_semaphore_uses_mutex(tracking_sem))
@ -579,9 +574,8 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
    void *auth_tag_cpu_addr = uvm_rm_mem_get_cpu_va(semaphore->conf_computing.auth_tag);
    NvU32 *gpu_notifier_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.notifier);
    NvU32 *payload_cpu_addr = (NvU32 *)uvm_rm_mem_get_cpu_va(semaphore->conf_computing.encrypted_payload);
-    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);

-    UVM_ASSERT(uvm_conf_computing_mode_enabled(gpu));
+    UVM_ASSERT(g_uvm_global.conf_computing_enabled);
    UVM_ASSERT(uvm_channel_is_ce(channel));

    last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
@ -695,7 +689,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
    // Check for unexpected large jumps of the semaphore value
    UVM_ASSERT_MSG_RELEASE(new_value - old_value <= UVM_GPU_SEMAPHORE_MAX_JUMP,
                           "GPU %s unexpected semaphore (CPU VA 0x%llx) jump from 0x%llx to 0x%llx\n",
-                           tracking_semaphore->semaphore.page->pool->gpu->parent->name,
+                           uvm_gpu_name(tracking_semaphore->semaphore.page->pool->gpu),
                           (NvU64)(uintptr_t)tracking_semaphore->semaphore.payload,
                           old_value, new_value);

--- a/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_semaphore_test.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2015-2019 NVIDIA Corporation
+    Copyright (c) 2015-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -185,7 +185,7 @@ static NV_STATUS test_alloc(uvm_va_space_t *va_space)

            // In SR-IOV heavy, there should be a mapping in the proxy VA space
            // too.
-            if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
+            if (uvm_parent_gpu_needs_proxy_channel_pool(gpu->parent)) {
                gpu_va = uvm_gpu_semaphore_get_gpu_proxy_va(&semaphores[i], gpu);
                TEST_CHECK_GOTO(gpu_va != 0, done);
            }
--- a/kernel-open/nvidia-uvm/uvm_hal.c
+++ b/kernel-open/nvidia-uvm/uvm_hal.c
@ -22,6 +22,7 @@
 *******************************************************************************/

 #include "uvm_hal.h"
+#include "uvm_global.h"
 #include "uvm_kvmalloc.h"

 #include "cla16f.h"
@ -680,7 +681,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    class_ops = ops_find_by_id(ce_table, ARRAY_SIZE(ce_table), gpu_info->ceClass);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n", gpu_info->ceClass, parent_gpu->name);
+        UVM_ERR_PRINT("Unsupported ce class: 0x%X, GPU %s\n",
+                      gpu_info->ceClass,
+                      uvm_parent_gpu_name(parent_gpu));
        return NV_ERR_INVALID_CLASS;
    }

@ -688,7 +691,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    class_ops = ops_find_by_id(host_table, ARRAY_SIZE(host_table), gpu_info->hostClass);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n", gpu_info->hostClass, parent_gpu->name);
+        UVM_ERR_PRINT("Unsupported host class: 0x%X, GPU %s\n",
+                      gpu_info->hostClass,
+                      uvm_parent_gpu_name(parent_gpu));
        return NV_ERR_INVALID_CLASS;
    }

@ -696,7 +701,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    class_ops = ops_find_by_id(arch_table, ARRAY_SIZE(arch_table), gpu_info->gpuArch);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n", gpu_info->gpuArch, parent_gpu->name);
+        UVM_ERR_PRINT("Unsupported GPU architecture: 0x%X, GPU %s\n",
+                      gpu_info->gpuArch,
+                      uvm_parent_gpu_name(parent_gpu));
        return NV_ERR_INVALID_CLASS;
    }

@ -704,7 +711,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    class_ops = ops_find_by_id(fault_buffer_table, ARRAY_SIZE(fault_buffer_table), gpu_info->gpuArch);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        UVM_ERR_PRINT("Fault buffer HAL not found, GPU %s, arch: 0x%X\n",
+                      uvm_parent_gpu_name(parent_gpu),
+                      gpu_info->gpuArch);
        return NV_ERR_INVALID_CLASS;
    }

@ -714,7 +723,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
                               ARRAY_SIZE(access_counter_buffer_table),
                               gpu_info->gpuArch);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        UVM_ERR_PRINT("Access counter HAL not found, GPU %s, arch: 0x%X\n",
+                      uvm_parent_gpu_name(parent_gpu),
+                      gpu_info->gpuArch);
        return NV_ERR_INVALID_CLASS;
    }

@ -722,7 +733,9 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)

    class_ops = ops_find_by_id(sec2_table, ARRAY_SIZE(sec2_table), gpu_info->gpuArch);
    if (class_ops == NULL) {
-        UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n", parent_gpu->name, gpu_info->gpuArch);
+        UVM_ERR_PRINT("SEC2 HAL not found, GPU %s, arch: 0x%X\n",
+                      uvm_parent_gpu_name(parent_gpu),
+                      gpu_info->gpuArch);
        return NV_ERR_INVALID_CLASS;
    }

@ -736,11 +749,16 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
    // Access counters are currently not supported in vGPU.
    //
    // TODO: Bug 200692962: Add support for access counters in vGPU
-    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
+    if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) {
        parent_gpu->access_counters_supported = false;
-    // Access counters are not supported in CC.
-    else if (uvm_conf_computing_mode_enabled_parent(parent_gpu))
+        parent_gpu->access_counters_can_use_physical_addresses = false;
+    }
+
+    // Access counters are not supported in Confidential Computing.
+    else if (g_uvm_global.conf_computing_enabled) {
        parent_gpu->access_counters_supported = false;
+        parent_gpu->access_counters_can_use_physical_addresses = false;
+    }
 }

 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
--- a/kernel-open/nvidia-uvm/uvm_hal.h
+++ b/kernel-open/nvidia-uvm/uvm_hal.h
@ -292,9 +292,9 @@ typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
 NvU32 uvm_hal_maxwell_ce_plc_mode(void);
 NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);

-typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
-NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
-NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
+typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);
+NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);

 // CE method validation
 typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
--- a/kernel-open/nvidia-uvm/uvm_hmm.c
+++ b/kernel-open/nvidia-uvm/uvm_hmm.c
@ -112,7 +112,20 @@ typedef struct

 bool uvm_hmm_is_enabled_system_wide(void)
 {
-    return !uvm_disable_hmm && !g_uvm_global.ats.enabled && uvm_va_space_mm_enabled_system();
+    if (uvm_disable_hmm)
+        return false;
+
+    if (g_uvm_global.ats.enabled)
+        return false;
+
+    // Confidential Computing and HMM impose mutually exclusive constraints. In
+    // Confidential Computing the GPU can only access pages resident in vidmem,
+    // but in HMM pages may be required to be resident in sysmem: file backed
+    // VMAs, huge pages, etc.
+    if (g_uvm_global.conf_computing_enabled)
+        return false;
+
+    return uvm_va_space_mm_enabled_system();
 }

 bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
@ -132,8 +145,9 @@ static uvm_va_block_t *hmm_va_block_from_node(uvm_range_tree_node_t *node)
 // Copies the contents of the source device-private page to the
 // destination CPU page. This will invalidate mappings, so cannot be
 // called while holding any va_block locks.
-static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *src_page, uvm_tracker_t *tracker)
+static void hmm_copy_devmem_page(struct page *dst_page, struct page *src_page)
 {
+    uvm_tracker_t tracker = UVM_TRACKER_INIT();
    uvm_gpu_phys_address_t src_addr;
    uvm_gpu_phys_address_t dst_addr;
    uvm_gpu_chunk_t *gpu_chunk;
@ -152,9 +166,9 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
    gpu = uvm_gpu_chunk_get_gpu(gpu_chunk);
    status = uvm_mmu_chunk_map(gpu_chunk);
    if (status != NV_OK)
-        return status;
+        goto out_zero;

-    status = uvm_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
+    status = uvm_parent_gpu_map_cpu_pages(gpu->parent, dst_page, PAGE_SIZE, &dma_addr);
    if (status != NV_OK)
        goto out_unmap_gpu;

@ -162,7 +176,7 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
    src_addr = uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_chunk->address);
    status = uvm_push_begin_acquire(gpu->channel_manager,
                                    UVM_CHANNEL_TYPE_GPU_TO_CPU,
-                                    tracker,
+                                    &tracker,
                                    &push,
                                    "Copy for remote process fault");
    if (status != NV_OK)
@ -173,15 +187,23 @@ static NV_STATUS uvm_hmm_copy_devmem_page(struct page *dst_page, struct page *sr
                                 uvm_gpu_address_copy(gpu, src_addr),
                                 PAGE_SIZE);
    uvm_push_end(&push);
-    status = uvm_tracker_add_push_safe(tracker, &push);
+    status = uvm_tracker_add_push_safe(&tracker, &push);
+    if (status == NV_OK)
+        uvm_tracker_wait_deinit(&tracker);

 out_unmap_cpu:
-    uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);
+    uvm_parent_gpu_unmap_cpu_pages(gpu->parent, dma_addr, PAGE_SIZE);

 out_unmap_gpu:
    uvm_mmu_chunk_unmap(gpu_chunk, NULL);

-    return status;
+out_zero:
+    // We can't fail eviction because we need to free the device-private pages
+    // so the GPU can be unregistered. So the best we can do is warn on any
+    // failures and zero the uninitialised page. This could result in data loss
+    // in the application but failures are not expected.
+    if (WARN_ON(status != NV_OK))
+        memzero_page(dst_page, 0, PAGE_SIZE);
 }

 static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
@ -197,7 +219,6 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
        return errno_to_nv_status(ret);

    if (src_pfn & MIGRATE_PFN_MIGRATE) {
-        uvm_tracker_t tracker = UVM_TRACKER_INIT();

        dst_page = alloc_page(GFP_HIGHUSER_MOVABLE);
        if (!dst_page) {
@ -206,12 +227,9 @@ static NV_STATUS uvm_hmm_pmm_gpu_evict_pfn(unsigned long pfn)
        }

        lock_page(dst_page);
-        if (WARN_ON(uvm_hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn), &tracker) != NV_OK))
-            memzero_page(dst_page, 0, PAGE_SIZE);
-
+        hmm_copy_devmem_page(dst_page, migrate_pfn_to_page(src_pfn));
        dst_pfn = migrate_pfn(page_to_pfn(dst_page));
        migrate_device_pages(&src_pfn, &dst_pfn, 1);
-        uvm_tracker_wait_deinit(&tracker);
    }

 out:
@ -674,12 +692,6 @@ bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
    return true;
 }

-void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
-{
-    // TODO: Bug 4050579: Remove this when swap cached pages can be migrated.
-    service_context->block_context->hmm.swap_cached = false;
-}
-
 NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
 {
    if (uvm_mutex_trylock(&va_block->hmm.migrate_lock))
@ -1072,6 +1084,7 @@ done:
 static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
                                                   uvm_va_block_context_t *va_block_context,
                                                   uvm_processor_id_t preferred_location,
+                                                   int preferred_cpu_nid,
                                                   NvU64 addr,
                                                   NvU64 end,
                                                   uvm_tracker_t *out_tracker)
@ -1085,10 +1098,10 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,

    // Note that we can't just call uvm_va_policy_set_range() for the whole
    // range [addr end] because we need to examine the old value of
-    // policy->preferred_location before setting it. Thus we iterate over
-    // the existing policy nodes.
+    // policy->preferred_location and policy->preferred_nid before setting it.
+    // Thus we iterate over the existing policy nodes.
    uvm_for_each_va_policy_in(old_policy, va_block, addr, end, node, region) {
-        if (uvm_id_equal(old_policy->preferred_location, preferred_location))
+        if (uvm_va_policy_preferred_location_equal(old_policy, preferred_location, preferred_cpu_nid))
            continue;

        // If the old preferred location is a valid processor ID, remote
@ -1100,7 +1113,11 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
            uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
            uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);

-        if (!uvm_va_policy_set_preferred_location(va_block, region, preferred_location, old_policy))
+        if (!uvm_va_policy_set_preferred_location(va_block,
+                                                  region,
+                                                  preferred_location,
+                                                  preferred_cpu_nid,
+                                                  old_policy))
            return NV_ERR_NO_MEMORY;

        // Establish new remote mappings if the old preferred location had
@ -1134,6 +1151,7 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,

 NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
                                         uvm_processor_id_t preferred_location,
+                                         int preferred_cpu_nid,
                                         NvU64 base,
                                         NvU64 last_address,
                                         uvm_tracker_t *out_tracker)
@ -1170,6 +1188,7 @@ NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
        status = hmm_set_preferred_location_locked(va_block,
                                                   va_block_context,
                                                   preferred_location,
+                                                   preferred_cpu_nid,
                                                   addr,
                                                   end,
                                                   out_tracker);
@ -1259,6 +1278,7 @@ NV_STATUS uvm_hmm_set_accessed_by(uvm_va_space_t *va_space,
                                         UVM_VA_POLICY_ACCESSED_BY,
                                         !set_bit,
                                         processor_id,
+                                         NUMA_NO_NODE,
                                         UVM_READ_DUPLICATION_MAX);

        if (status == NV_OK && set_bit) {
@ -1968,28 +1988,74 @@ static void fill_dst_pfns(uvm_va_block_t *va_block,
    }
 }

-static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
-                                       struct vm_area_struct *vma,
+static NV_STATUS alloc_page_on_cpu(uvm_va_block_t *va_block,
+                                   uvm_page_index_t page_index,
+                                   const unsigned long *src_pfns,
+                                   unsigned long *dst_pfns,
+                                   uvm_page_mask_t *same_devmem_page_mask,
+                                   uvm_va_block_context_t *block_context)
+{
+    NV_STATUS status;
+    struct page *src_page;
+    struct page *dst_page;
+
+    // This is the page that will be copied to system memory.
+    src_page = migrate_pfn_to_page(src_pfns[page_index]);
+
+    if (src_page) {
+        // mremap may have caused us to lose the gpu_chunk associated with
+        // this va_block/page_index so make sure we have the correct chunk.
+        if (is_device_private_page(src_page))
+            gpu_chunk_add(va_block, page_index, src_page);
+
+        if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
+            lock_block_cpu_page(va_block, page_index, src_page, dst_pfns, same_devmem_page_mask);
+            return NV_OK;
+        }
+    }
+
+    UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
+                !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
+
+    status = uvm_va_block_populate_page_cpu(va_block, page_index, block_context);
+    if (status != NV_OK)
+        return status;
+
+    // TODO: Bug 3368756: add support for transparent huge pages
+    // Support for large CPU pages means the page_index may need fixing
+    dst_page = migrate_pfn_to_page(block_context->hmm.dst_pfns[page_index]);
+
+    // Note that we don't call get_page(dst_page) since alloc_page_vma()
+    // returns with a page reference count of one and we are passing
+    // ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded
+    // the page as "mirrored" so that migrate_vma_finalize() and
+    // hmm_va_block_cpu_page_unpopulate() don't double free the page.
+    lock_page(dst_page);
+    dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page));
+
+    return NV_OK;
+}
+
+// Allocates pages on the CPU to handle migration due to a page fault
+static NV_STATUS fault_alloc_on_cpu(uvm_va_block_t *va_block,
                                    const unsigned long *src_pfns,
                                    unsigned long *dst_pfns,
                                    uvm_va_block_region_t region,
                                    uvm_page_mask_t *page_mask,
                                    uvm_page_mask_t *same_devmem_page_mask,
-                                       uvm_processor_id_t processor_id,
+                                    uvm_processor_id_t fault_processor_id,
                                    uvm_service_block_context_t *service_context)
 {
    uvm_page_index_t page_index;
    NV_STATUS status = NV_OK;

-    for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
-        struct page *src_page;
-        struct page *dst_page;
-        gfp_t gfp;
+    UVM_ASSERT(service_context);

+    for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
        if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) {
            // Device exclusive PTEs are not selected but we still want to
            // process the page so record it as such.
-            if (service_context && !UVM_ID_IS_CPU(processor_id) &&
+            if (!UVM_ID_IS_CPU(fault_processor_id) &&
                service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG) {
                uvm_page_mask_set(same_devmem_page_mask, page_index);
                continue;
@ -2004,73 +2070,19 @@ static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
            goto clr_mask;
        }

-        // This is the page that will be copied to system memory.
-        src_page = migrate_pfn_to_page(src_pfns[page_index]);
-
-        if (src_page) {
-            // mremap may have caused us to loose the gpu_chunk associated with
-            // this va_block/page_index so make sure we have the correct chunk.
-            if (is_device_private_page(src_page))
-                gpu_chunk_add(va_block, page_index, src_page);
-
-            if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
-                lock_block_cpu_page(va_block, page_index, src_page, dst_pfns, same_devmem_page_mask);
-                continue;
-            }
-        }
-
-        UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) ||
-                   !uvm_va_block_cpu_is_page_resident_on(va_block, NUMA_NO_NODE, page_index));
-
-        // Allocate a user system memory page for the destination.
-        // This is the typical case since Linux will free the source page when
-        // migrating to device private memory.
-        // If there is no source page, it means the page is pte_none() or the
-        // zero page. This case "shouldn't happen" because we asked
-        // migrate_vma_setup() only for device private pages but
-        // migrate_vma_collect_hole() doesn't check the
-        // MIGRATE_VMA_SELECT_SYSTEM flag.
-        gfp = GFP_HIGHUSER_MOVABLE;
-        if (!src_page)
-            gfp |= __GFP_ZERO;
-
-        dst_page = alloc_page_vma(gfp,
-                                  vma,
-                                  va_block->start + (page_index << PAGE_SHIFT));
-        if (!dst_page) {
-            // Ignore errors if the page is only for prefetching.
-            if (service_context &&
-                service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_PREFETCH)
-                goto clr_mask;
-            UVM_ERR_PRINT("cannot allocate page %u (addr 0x%llx)\n",
-                          page_index, va_block->start + (page_index << PAGE_SHIFT));
-            status = NV_ERR_NO_MEMORY;
-            break;
-        }
-
-        status = hmm_va_block_cpu_page_populate(va_block, page_index, dst_page);
+        status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, service_context->block_context);
        if (status != NV_OK) {
-            __free_page(dst_page);
            // Ignore errors if the page is only for prefetching.
            if (service_context &&
                service_context->access_type[page_index] == UVM_FAULT_ACCESS_TYPE_PREFETCH)
                goto clr_mask;
            break;
        }
-
-        // Note that we don't call get_page(dst_page) since alloc_page_vma()
-        // returns with a page reference count of one and we are passing
-        // ownership to Linux. Also, uvm_va_block_cpu_page_populate() recorded
-        // the page as "mirrored" so that migrate_vma_finalize() and
-        // hmm_va_block_cpu_page_unpopulate() don't double free the page.
-        lock_page(dst_page);
-        dst_pfns[page_index] = migrate_pfn(page_to_pfn(dst_page));
        continue;

    clr_mask:
        // TODO: Bug 3900774: clean up murky mess of mask clearing.
        uvm_page_mask_clear(page_mask, page_index);
-        if (service_context)
        clear_service_context_masks(service_context, UVM_ID_CPU, page_index);
    }

@ -2082,6 +2094,40 @@ static NV_STATUS alloc_and_copy_to_cpu(uvm_va_block_t *va_block,
    return status;
 }

+// Allocates pages on the CPU for explicit migration calls.
+static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block,
+                                      const unsigned long *src_pfns,
+                                      unsigned long *dst_pfns,
+                                      uvm_va_block_region_t region,
+                                      uvm_page_mask_t *page_mask,
+                                      uvm_page_mask_t *same_devmem_page_mask,
+                                      uvm_va_block_context_t *block_context)
+{
+    uvm_page_index_t page_index;
+    NV_STATUS status = NV_OK;
+
+    for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
+        if (!(src_pfns[page_index] & MIGRATE_PFN_MIGRATE)) {
+            // We have previously found a page that is CPU resident which can't
+            // be migrated (probably a shared mapping) so make sure we establish
+            // a remote mapping for it.
+            if (uvm_page_mask_test(same_devmem_page_mask, page_index))
+                continue;
+
+            uvm_page_mask_clear(page_mask, page_index);
+            continue;
+        }
+
+        status = alloc_page_on_cpu(va_block, page_index, src_pfns, dst_pfns, same_devmem_page_mask, block_context);
+    }
+
+    if (status != NV_OK)
+        clean_up_non_migrating_pages(va_block, src_pfns, dst_pfns, region, page_mask);
+    else if (uvm_page_mask_empty(page_mask))
+        return NV_WARN_MORE_PROCESSING_REQUIRED;
+
+    return status;
+}
 static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
 {
    uvm_processor_id_t processor_id;
@ -2107,8 +2153,7 @@ static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_contex
    page_mask = &devmem_fault_context->page_mask;
    uvm_page_mask_copy(page_mask, &service_context->per_processor_masks[UVM_ID_CPU_VALUE].new_residency);

-    status = alloc_and_copy_to_cpu(va_block,
-                                   service_context->block_context->hmm.vma,
+    status = fault_alloc_on_cpu(va_block,
                                src_pfns,
                                dst_pfns,
                                service_context->region,
@ -2640,14 +2685,10 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
            if (PageSwapCache(src_page)) {
                // TODO: Bug 4050579: Remove this when swap cached pages can be
                // migrated.
-                if (service_context) {
-                    service_context->block_context->hmm.swap_cached = true;
+                status = NV_WARN_MISMATCHED_TARGET;
                break;
            }

-                goto clr_mask;
-            }
-
            // If the page is already allocated, it is most likely a mirrored
            // page. Check to be sure it matches what we have recorded. The
            // page shouldn't be a staging page from a GPU to GPU migration
@ -2699,8 +2740,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
            clear_service_context_masks(service_context, dest_id, page_index);
    }

-    if (uvm_page_mask_empty(page_mask) ||
-        (service_context && service_context->block_context->hmm.swap_cached))
+    if (uvm_page_mask_empty(page_mask))
        status = NV_WARN_MORE_PROCESSING_REQUIRED;

    if (status != NV_OK)
@ -2945,15 +2985,13 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
    uvm_assert_mutex_locked(&va_block->lock);

    if (UVM_ID_IS_CPU(dest_id)) {
-        status = alloc_and_copy_to_cpu(va_block,
-                                       vma,
+        status = migrate_alloc_on_cpu(va_block,
                                      src_pfns,
                                      dst_pfns,
                                      region,
                                      page_mask,
                                      &uvm_hmm_migrate_event->same_devmem_page_mask,
-                                       UVM_ID_INVALID,
-                                       NULL);
+                                      va_block_context);
    }
    else {
        status = dmamap_src_sysmem_pages(va_block,
@ -3154,7 +3192,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,

    migrate_vma_finalize(args);

-    if (status == NV_WARN_NOTHING_TO_DO)
+    if (status == NV_WARN_NOTHING_TO_DO || status == NV_WARN_MISMATCHED_TARGET)
        status = NV_OK;

    return status;
@ -3288,15 +3326,13 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
        // TODO: Bug 3660922: Need to handle read duplication at some point.
        UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));

-        status = alloc_and_copy_to_cpu(va_block,
-                                       NULL,
+        status = migrate_alloc_on_cpu(va_block,
                                      src_pfns,
                                      dst_pfns,
                                      region,
                                      page_mask,
                                      NULL,
-                                       UVM_ID_INVALID,
-                                       NULL);
+                                      va_block_context);
        if (status != NV_OK)
            goto err;

@ -3392,7 +3428,6 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
    unsigned long dst_pfn;
    struct migrate_vma args;
    struct page *src_page = vmf->page;
-    uvm_tracker_t tracker = UVM_TRACKER_INIT();
    int ret;

    args.vma = vmf->vma;
@ -3421,9 +3456,7 @@ NV_STATUS uvm_hmm_remote_cpu_fault(struct vm_fault *vmf)
        lock_page(dst_page);
        dst_pfn = migrate_pfn(page_to_pfn(dst_page));

-        status = uvm_hmm_copy_devmem_page(dst_page, src_page, &tracker);
-        if (status == NV_OK)
-            status = uvm_tracker_wait_deinit(&tracker);
+        hmm_copy_devmem_page(dst_page, src_page);
    }

    migrate_vma_pages(&args);
@ -3591,6 +3624,7 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
    params->va_range_end = ULONG_MAX;
    params->read_duplication = UVM_TEST_READ_DUPLICATION_UNSET;
    memset(&params->preferred_location, 0, sizeof(params->preferred_location));
+    params->preferred_cpu_nid = NUMA_NO_NODE;
    params->accessed_by_count = 0;
    params->managed.vma_start = 0;
    params->managed.vma_end = 0;
@ -3633,8 +3667,10 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,

        params->read_duplication = node->policy.read_duplication;

-        if (!UVM_ID_IS_INVALID(node->policy.preferred_location))
+        if (!UVM_ID_IS_INVALID(node->policy.preferred_location)) {
            uvm_va_space_processor_uuid(va_space, &params->preferred_location, node->policy.preferred_location);
+            params->preferred_cpu_nid = node->policy.preferred_nid;
+        }

        for_each_id_in_mask(processor_id, &node->policy.accessed_by)
            uvm_va_space_processor_uuid(va_space, &params->accessed_by[params->accessed_by_count++], processor_id);
@ -3652,22 +3688,16 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
 // TODO: Bug 3660968: Remove this hack as soon as HMM migration is implemented
 // for VMAs other than anonymous private memory.
 bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
-                             uvm_va_block_context_t *va_block_context)
+                             struct vm_area_struct *vma)
 {
-    struct vm_area_struct *vma = va_block_context->hmm.vma;
-
    uvm_assert_mutex_locked(&va_block->lock);

    if (!uvm_va_block_is_hmm(va_block))
        return false;

    UVM_ASSERT(vma);
-    UVM_ASSERT(va_block_context->mm == vma->vm_mm);
-    uvm_assert_mmap_lock_locked(va_block_context->mm);
-
-    // TODO: Bug 4050579: Remove this when swap cached pages can be migrated.
-    if (va_block_context->hmm.swap_cached)
-        return true;
+    UVM_ASSERT(va_block->hmm.va_space->va_space_mm.mm == vma->vm_mm);
+    uvm_assert_mmap_lock_locked(vma->vm_mm);

    // migrate_vma_setup() can't migrate VM_SPECIAL so we have to force GPU
    // remote mapping.
--- a/kernel-open/nvidia-uvm/uvm_hmm.h
+++ b/kernel-open/nvidia-uvm/uvm_hmm.h
@ -114,11 +114,6 @@ typedef struct
                                            struct vm_area_struct *vma,
                                            uvm_va_block_region_t region);

-    // Initialize the HMM portion of the service_context.
-    // This should be called one time before any retry loops calling
-    // uvm_va_block_service_locked().
-    void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context);
-
    // Begin a migration critical section. When calling into the kernel it is
    // sometimes necessary to drop the va_block lock. This function returns
    // NV_OK when no other thread has started a migration critical section.
@ -183,6 +178,7 @@ typedef struct
    // and the va_space lock must be held in write mode.
    NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
                                             uvm_processor_id_t preferred_location,
+                                             int preferred_cpu_nid,
                                             NvU64 base,
                                             NvU64 last_address,
                                             uvm_tracker_t *out_tracker);
@ -271,6 +267,18 @@ typedef struct
                                            NvU64 addr);

    // This is called to service a GPU fault.
+    // processor_id is the faulting processor.
+    // new_residency is the processor where the data should be migrated to.
+    // Special return values (besides things like NV_ERR_NO_MEMORY):
+    // NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
+    // not be migrated and that a retry might succeed after unlocking the
+    // va_block lock, va_space lock, and mmap lock.
+    // NV_WARN_MISMATCHED_TARGET is a special case of GPU fault handling when a
+    // GPU is chosen as the destination and the source is a HMM CPU page that
+    // can't be migrated (i.e., must remain in system memory). In that case,
+    // uvm_va_block_select_residency() should be called with 'hmm_migratable'
+    // set to false so that system memory will be selected. Then this call can
+    // be retried to service the GPU fault by migrating to system memory.
    // Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
    // the va_space read lock must be held, and the va_block lock held.
    NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
@ -282,8 +290,10 @@ typedef struct
    // This is called to migrate a region within a HMM va_block.
    // va_block_context must not be NULL and va_block_context->hmm.vma
    // must be valid.
-    // Locking: the va_space->va_space_mm.mm must be retained, mmap_lock must be
-    // locked, and the va_block lock held.
+    // Special return values (besides things like NV_ERR_NO_MEMORY):
+    // NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
+    // not be migrated and that a retry might succeed after unlocking the
+    // va_block lock, va_space lock, and mmap lock.
    NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
                                              uvm_va_block_retry_t *va_block_retry,
                                              uvm_va_block_context_t *va_block_context,
@ -382,7 +392,7 @@ typedef struct
    // va_block, the va_block_context->mm must be retained and locked for least
    // read.
    bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
-                                 uvm_va_block_context_t *va_block_context);
+                                 struct vm_area_struct *vma);

 #else // UVM_IS_CONFIG_HMM()

@ -441,10 +451,6 @@ typedef struct
        return true;
    }

-    static void uvm_hmm_service_context_init(uvm_service_block_context_t *service_context)
-    {
-    }
-
    static NV_STATUS uvm_hmm_migrate_begin(uvm_va_block_t *va_block)
    {
        return NV_OK;
@ -485,6 +491,7 @@ typedef struct

    static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
                                                    uvm_processor_id_t preferred_location,
+                                                    int preferred_cpu_nid,
                                                    NvU64 base,
                                                    NvU64 last_address,
                                                    uvm_tracker_t *out_tracker)
@ -648,7 +655,7 @@ typedef struct
    }

    static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
-                                        uvm_va_block_context_t *va_block_context)
+                                        struct vm_area_struct *vma)
    {
        return false;
    }
--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@ -55,7 +55,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;

    // See uvm_mmu.h for mapping placement
-    parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (8 * UVM_SIZE_1TB);
+    parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);

    // Physical CE writes to vidmem are non-coherent with respect to the CPU on
    // GH180.
@ -88,6 +88,8 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = true;

+    parent_gpu->access_counters_can_use_physical_addresses = false;
+
    parent_gpu->fault_cancel_va_supported = true;

    parent_gpu->scoped_atomics_supported = true;
@ -103,5 +105,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
-}

+    parent_gpu->no_ats_range_required = true;
+}
--- a/kernel-open/nvidia-uvm/uvm_hopper_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_ce.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2022 NVIDIA Corporation
+    Copyright (c) 2020-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -22,6 +22,7 @@
 *******************************************************************************/

 #include "uvm_hal.h"
+#include "uvm_global.h"
 #include "uvm_push.h"
 #include "uvm_mem.h"
 #include "uvm_conf_computing.h"
@ -154,7 +155,8 @@ static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t ds

 static bool va_is_flat_vidmem(uvm_gpu_t *gpu, NvU64 va)
 {
-    return (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu)) &&
+    return (uvm_mmu_parent_gpu_needs_static_vidmem_mapping(gpu->parent) ||
+            uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent)) &&
           va >= gpu->parent->flat_vidmem_va_base &&
           va < gpu->parent->flat_vidmem_va_base + UVM_GPU_MAX_PHYS_MEM;
 }
@ -180,17 +182,18 @@ static bool hopper_scrub_enable(uvm_gpu_t *gpu, uvm_gpu_address_t *dst, size_t s
    return !dst->is_virtual && dst->aperture == UVM_APERTURE_VID;
 }

-static NvU32 hopper_memset_copy_type(uvm_push_t *push, uvm_gpu_address_t dst)
+static NvU32 hopper_memset_copy_type(uvm_gpu_address_t dst)
 {
-    if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected)
+    if (g_uvm_global.conf_computing_enabled && dst.is_unprotected)
        return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
    return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
 }

-NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
-    if (uvm_conf_computing_mode_enabled(uvm_push_get_gpu(push)) && dst.is_unprotected && src.is_unprotected)
+    if (g_uvm_global.conf_computing_enabled && dst.is_unprotected && src.is_unprotected)
        return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, NONPROT2NONPROT);
+
    return HWCONST(C8B5, LAUNCH_DMA, COPY_TYPE, DEFAULT);
 }

@ -210,7 +213,7 @@ static void hopper_memset_common(uvm_push_t *push,
    NvU32 launch_dma_remap_enable;
    NvU32 launch_dma_scrub_enable;
    NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
-    NvU32 copy_type_value = hopper_memset_copy_type(push, dst);
+    NvU32 copy_type_value = hopper_memset_copy_type(dst);
    bool is_scrub = hopper_scrub_enable(gpu, &dst, num_elements * memset_element_size);

    UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, num_elements, memset_element_size),
--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2020-2022 NVIDIA Corporation
+    Copyright (c) 2020-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -33,6 +33,7 @@

 #include "uvm_types.h"
 #include "uvm_global.h"
+#include "uvm_common.h"
 #include "uvm_hal.h"
 #include "uvm_hal_types.h"
 #include "uvm_hopper_fault_buffer.h"
@ -42,6 +43,10 @@
 #define MMU_BIG 0
 #define MMU_SMALL 1

+// Used in pde_pcf().
+#define ATS_ALLOWED 0
+#define ATS_NOT_ALLOWED 1
+
 uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
 {
    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
@ -260,7 +265,108 @@ static NvU64 poisoned_pte_hopper(void)
    return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
 }

-static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
+typedef enum
+{
+    PDE_TYPE_SINGLE,
+    PDE_TYPE_DUAL_BIG,
+    PDE_TYPE_DUAL_SMALL,
+    PDE_TYPE_COUNT,
+} pde_type_t;
+
+static const NvU8 valid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED },
+                                     { NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED },
+                                     { NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED } };
+
+static const NvU8 invalid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED },
+                                       { NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED },
+                                       { NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED } };
+
+static const NvU8 va_base[] = { 56, 47, 38, 29, 21 };
+
+static bool is_ats_range_valid(uvm_page_directory_t *dir, NvU32 child_index)
+{
+    NvU64 pde_base_va;
+    NvU64 min_va_upper;
+    NvU64 max_va_lower;
+    NvU32 index_in_dir;
+
+    uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
+
+    UVM_ASSERT(dir->depth < ARRAY_SIZE(va_base));
+
+    // We can use UVM_PAGE_SIZE_AGNOSTIC because page_size is only used in
+    // index_bits_hopper() for PTE table, i.e., depth 5+, which does not use a
+    // PDE PCF or an ATS_ALLOWED/NOT_ALLOWED setting.
+    UVM_ASSERT(child_index < (1ull << index_bits_hopper(dir->depth, UVM_PAGE_SIZE_AGNOSTIC)));
+
+    pde_base_va = 0;
+    index_in_dir = child_index;
+    while (dir) {
+        pde_base_va += index_in_dir * (1ull << va_base[dir->depth]);
+        index_in_dir = dir->index_in_parent;
+        dir = dir->host_parent;
+    }
+    pde_base_va = (NvU64)((NvS64)(pde_base_va << (64 - num_va_bits_hopper())) >> (64 - num_va_bits_hopper()));
+
+    if (pde_base_va < max_va_lower || pde_base_va >= min_va_upper)
+        return true;
+
+    return false;
+}
+
+// PDE Permission Control Flags
+static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir, NvU32 child_index)
+{
+    const NvU8 (*pcf)[2] = valid ? valid_pcf : invalid_pcf;
+    NvU8 depth = dir->depth;
+
+    UVM_ASSERT(pde_type < PDE_TYPE_COUNT);
+    UVM_ASSERT(depth < 5);
+
+    // On non-ATS systems, PDE PCF only sets the valid and volatile/cache bits.
+    if (!g_uvm_global.ats.enabled)
+        return pcf[pde_type][ATS_ALLOWED];
+
+    // We assume all supported ATS platforms use canonical form address.
+    // See comments in uvm_gpu.c:uvm_gpu_can_address() and in
+    // uvm_mmu.c:page_tree_ats_init();
+    UVM_ASSERT(uvm_platform_uses_canonical_form_address());
+
+    // Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
+    // ATS and GMMU page tables. For managed memory we need to prevent this
+    // parallel lookup since we would not get any GPU fault if the CPU has
+    // a valid mapping. Also, for external ranges that are known to be
+    // mapped entirely on the GMMU page table we can skip the ATS lookup
+    // for performance reasons. Parallel ATS lookup is disabled in PDE1
+    // (depth 3) and, therefore, it applies to the underlying 512MB VA
+    // range.
+    //
+    // UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
+    // This is fine because CUDA ensures that all managed and external
+    // allocations are properly compartmentalized in 512MB-aligned VA
+    // regions. For cudaHostRegister CUDA cannot control the VA range, but
+    // we rely on ATS for those allocations so they can't choose the
+    // ATS_NOT_ALLOWED mode.
+    // TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range to
+    //                    PTEs.
+    // HW complies with the leaf PDE's ATS_ALLOWED/ATS_NOT_ALLOWED settings,
+    // enabling us to treat any upper-level PDE as a don't care as long as there
+    // are leaf PDEs for the entire upper-level PDE range. We assume PDE4
+    // entries (depth == 0) are always ATS enabled, and the no_ats_range is in
+    // PDE3 or lower.
+    if (depth == 0 || (!valid && is_ats_range_valid(dir, child_index)))
+        return pcf[pde_type][ATS_ALLOWED];
+
+    return pcf[pde_type][ATS_NOT_ALLOWED];
+}
+
+static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -280,38 +386,17 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 dep
                break;
        }

-        // PCF (permission control flags) 5:3
-        // Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
-        // ATS and GMMU page tables. For managed memory we need to prevent this
-        // parallel lookup since we would not get any GPU fault if the CPU has
-        // a valid mapping. Also, for external ranges that are known to be
-        // mapped entirely on the GMMU page table we can skip the ATS lookup
-        // for performance reasons. Parallel ATS lookup is disabled in PDE1
-        // (depth 3) and, therefore, it applies to the underlying 512MB VA
-        // range.
-        //
-        // UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
-        // This is fine because CUDA ensures that all managed and external
-        // allocations are properly compartmentalized in 512MB-aligned VA
-        // regions. For cudaHostRegister CUDA cannot control the VA range, but
-        // we rely on ATS for those allocations so they can't choose the
-        // ATS_NOT_ALLOWED mode.
-        //
-        // TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
-        // to PTEs.
-        if (depth == 3 && g_uvm_global.ats.enabled)
-            pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
-        else
-            pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
-
        // address 51:12
        pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
    }

+    // PCF (permission control flags) 5:3
+    pde_bits |= HWVALUE64(_MMU_VER3, PDE, PCF, pde_pcf(phys_alloc != NULL, PDE_TYPE_SINGLE, dir, child_index));
+
    return pde_bits;
 }

-static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
+static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -330,17 +415,20 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
                break;
        }

-        // PCF (permission control flags) 5:3
-        pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
-
        // address 51:8
        pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
    }

+    // PCF (permission control flags) 5:3
+    pde_bits |= HWVALUE64(_MMU_VER3,
+                          DUAL_PDE,
+                          PCF_BIG,
+                          pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_BIG, dir, child_index));
+
    return pde_bits;
 }

-static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
+static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -359,29 +447,40 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
                break;
        }

-        // PCF (permission control flags) 69:67 [5:3]
-        pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
-
        // address 115:76 [51:12]
        pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
    }
+
+    // PCF (permission control flags) 69:67 [5:3]
+    pde_bits |= HWVALUE64(_MMU_VER3,
+                          DUAL_PDE,
+                          PCF_SMALL,
+                          pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_SMALL, dir, child_index));
+
    return pde_bits;
 }

-static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
+static void make_pde_hopper(void *entry,
+                            uvm_mmu_page_table_alloc_t **phys_allocs,
+                            uvm_page_directory_t *dir,
+                            NvU32 child_index)
 {
-    NvU32 entry_count = entries_per_index_hopper(depth);
+    NvU32 entry_count;
    NvU64 *entry_bits = (NvU64 *)entry;

+    UVM_ASSERT(dir);
+
+    entry_count = entries_per_index_hopper(dir->depth);
+
    if (entry_count == 1) {
-        *entry_bits = single_pde_hopper(*phys_allocs, depth);
+        *entry_bits = single_pde_hopper(*phys_allocs, dir, child_index);
    }
    else if (entry_count == 2) {
-        entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
-        entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
+        entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG], dir, child_index);
+        entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL], dir, child_index);

        // This entry applies to the whole dual PDE but is stored in the lower
-        // bits
+        // bits.
        entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
    }
    else {
--- a/kernel-open/nvidia-uvm/uvm_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_ioctl.h
@ -633,6 +633,7 @@ typedef struct
    NvU64           requestedBase      NV_ALIGN_BYTES(8); // IN
    NvU64           length             NV_ALIGN_BYTES(8); // IN
    NvProcessorUuid preferredLocation;                    // IN
+    NvS32           preferredCpuNumaNode;                 // IN
    NV_STATUS       rmStatus;                             // OUT
 } UVM_SET_PREFERRED_LOCATION_PARAMS;

@ -766,8 +767,19 @@ typedef struct
 #define UVM_MIGRATE_FLAGS_ALL                   (UVM_MIGRATE_FLAG_ASYNC | \
                                                 UVM_MIGRATE_FLAGS_TEST_ALL)

-// For pageable migrations, cpuNumaNode is used as the destination NUMA node if
-// destinationUuid is the CPU.
+// If NV_ERR_INVALID_ARGUMENT is returned it is because cpuMemoryNode is not
+// valid and the destination processor is the CPU. cpuMemoryNode is considered
+// invalid if:
+//      * it is less than -1,
+//      * it is equal to or larger than the maximum number of nodes, or
+//      * it corresponds to a registered GPU.
+//      * it is not in the node_possible_map set of nodes,
+//      * it does not have onlined memory
+//
+// For pageable migrations:
+//
+// In addition to the above, in the case of pageable memory, the
+// cpuMemoryNode is considered invalid if it's -1.
 //
 // If NV_WARN_NOTHING_TO_DO is returned, user-space is responsible for
 // completing the migration of the VA range described by userSpaceStart and
@ -775,6 +787,7 @@ typedef struct
 //
 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned, user-space is responsible
 // for re-trying with a different cpuNumaNode, starting at userSpaceStart.
+//
 #define UVM_MIGRATE                                                   UVM_IOCTL_BASE(51)
 typedef struct
 {
@ -784,7 +797,7 @@ typedef struct
    NvU32           flags;                                // IN
    NvU64           semaphoreAddress   NV_ALIGN_BYTES(8); // IN
    NvU32           semaphorePayload;                     // IN
-    NvU32           cpuNumaNode;                          // IN
+    NvS32           cpuNumaNode;                          // IN
    NvU64           userSpaceStart     NV_ALIGN_BYTES(8); // OUT
    NvU64           userSpaceLength    NV_ALIGN_BYTES(8); // OUT
    NV_STATUS       rmStatus;                             // OUT
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@ -36,7 +36,7 @@
 typedef struct
 {
    size_t alloc_size;
-    uint8_t ptr[0];
+    uint8_t ptr[];
 } uvm_vmalloc_hdr_t;

 typedef struct
--- a/kernel-open/nvidia-uvm/uvm_lock.c
+++ b/kernel-open/nvidia-uvm/uvm_lock.c
@ -27,7 +27,7 @@

 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
 {
-    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 33);
+    BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);

    switch (lock_order) {
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
@ -62,6 +62,7 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACE_TOOLS);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SEMA_POOL_TRACKER);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_SECURE_SEMAPHORE);
+        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_CTX);
        UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_LEAF);
        UVM_ENUM_STRING_DEFAULT();
    }
@ -362,10 +363,7 @@ NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_
    if (!bit_locks->bits)
        return NV_ERR_NO_MEMORY;

-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    bit_locks->lock_order = lock_order;
-#endif
+    uvm_lock_debug_init(bit_locks, lock_order);

    return NV_OK;
 }
--- a/kernel-open/nvidia-uvm/uvm_lock.h
+++ b/kernel-open/nvidia-uvm/uvm_lock.h
@ -448,6 +448,12 @@
 //
 //      CE semaphore payloads are encrypted, and require to take the CSL lock
 //      (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
+
+// - CSL Context
+//      Order: UVM_LOCK_ORDER_CSL_CTX
+//      When the Confidential Computing feature is enabled, encrypt/decrypt
+//      operations to communicate with GPU are handled by the CSL context.
+//      This lock protects RM calls that use this context.
 //
 // - Leaf locks
 //      Order: UVM_LOCK_ORDER_LEAF
@ -492,6 +498,11 @@ typedef enum
    UVM_LOCK_ORDER_VA_SPACE_TOOLS,
    UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
    UVM_LOCK_ORDER_SECURE_SEMAPHORE,
+
+    // TODO: Bug 4184836: [uvm][hcc] Remove UVM_LOCK_ORDER_CSL_CTX
+    // This lock order can be removed after RM no longer relies on RPC event
+    // notifications.
+    UVM_LOCK_ORDER_CSL_CTX,
    UVM_LOCK_ORDER_LEAF,
    UVM_LOCK_ORDER_COUNT,
 } uvm_lock_order_t;
@ -648,6 +659,15 @@ bool __uvm_locking_initialized(void);
 #define uvm_assert_lockable_order(order) UVM_ASSERT(__uvm_check_lockable_order(order, UVM_LOCK_FLAGS_MODE_ANY))
 #define uvm_assert_unlocked_order(order) UVM_ASSERT(__uvm_check_unlocked_order(order))

+#if UVM_IS_DEBUG()
+#define uvm_lock_debug_init(lock, order) ({        \
+        uvm_locking_assert_initialized();          \
+        (lock)->lock_order = (order);              \
+    })
+#else
+#define uvm_lock_debug_init(lock, order) ((void) order)
+#endif
+
 // Helpers for locking mmap_lock (mmap_sem in kernels < 5.8)
 // and recording its usage
 #define uvm_assert_mmap_lock_locked_mode(mm, flags) ({                                      \
@ -738,15 +758,12 @@ typedef struct

 #define uvm_assert_rwsem_unlocked(uvm_sem) UVM_ASSERT(!rwsem_is_locked(&(uvm_sem)->sem))

-static void uvm_init_rwsem(uvm_rw_semaphore_t *uvm_sem, uvm_lock_order_t lock_order)
-{
-    init_rwsem(&uvm_sem->sem);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    uvm_sem->lock_order = lock_order;
-#endif
-    uvm_assert_rwsem_unlocked(uvm_sem);
-}
+#define uvm_init_rwsem(uvm_sem, order) ({                   \
+        uvm_rw_semaphore_t *uvm_sem_ ## order = (uvm_sem);  \
+        init_rwsem(&uvm_sem_ ## order->sem);                \
+        uvm_lock_debug_init(uvm_sem, order);                \
+        uvm_assert_rwsem_unlocked(uvm_sem);                 \
+    })

 #define uvm_down_read(uvm_sem) ({                          \
        typeof(uvm_sem) _sem = (uvm_sem);                  \
@ -874,15 +891,12 @@ typedef struct
        UVM_ASSERT_MSG(!irqs_disabled() && !in_interrupt(), "Mutexes cannot be used with interrupts disabled"); \
    })

-static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
-{
-    mutex_init(&mutex->m);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    mutex->lock_order = lock_order;
-#endif
-    uvm_assert_mutex_unlocked(mutex);
-}
+#define uvm_mutex_init(mutex, order) ({                \
+        uvm_mutex_t *mutex_ ## order = (mutex);        \
+        mutex_init(&mutex_ ## order->m);               \
+        uvm_lock_debug_init(mutex, order);             \
+        uvm_assert_mutex_unlocked(mutex);              \
+    })

 #define uvm_mutex_lock(mutex) ({                                \
        typeof(mutex) _mutex = (mutex);                         \
@ -892,11 +906,14 @@ static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
        uvm_assert_mutex_locked(_mutex);                        \
    })

-// Lock w/o any tracking. This should be extremely rare and *_no_tracking
-// helpers will be added only as needed.
-#define uvm_mutex_lock_no_tracking(mutex) ({    \
+// Lock while already holding a lock of the same order taken with
+// uvm_mutex_lock() variant. Note this shouldn't be used if the held lock was
+// taken with uvm_mutex_lock_nested() because we only support a single level of
+// nesting. This should be extremely rare and *_nested helpers will only be
+// added as needed.
+#define uvm_mutex_lock_nested(mutex) ({         \
        uvm_assert_mutex_interrupts();          \
-        mutex_lock(&(mutex)->m);                \
+        mutex_lock_nested(&(mutex)->m, 1);      \
    })

 #define uvm_mutex_trylock(mutex) ({                                                      \
@ -926,9 +943,8 @@ static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
        uvm_record_unlock_out_of_order(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
    })

-// Unlock w/o any tracking. This should be extremely rare and *_no_tracking
-// helpers will be added only as needed.
-#define uvm_mutex_unlock_no_tracking(mutex) ({  \
+// Unlock w/o any tracking.
+#define uvm_mutex_unlock_nested(mutex) ({       \
        uvm_assert_mutex_interrupts();          \
        mutex_unlock(&(mutex)->m);              \
    })
@ -941,14 +957,11 @@ typedef struct
 #endif
 } uvm_semaphore_t;

-static void uvm_sema_init(uvm_semaphore_t *semaphore, int val, uvm_lock_order_t lock_order)
-{
-    sema_init(&semaphore->sem, val);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    semaphore->lock_order = lock_order;
-#endif
-}
+#define uvm_sema_init(semaphore, val, order) ({         \
+        uvm_semaphore_t *sem_ ## order = (semaphore);   \
+        sema_init(&sem_ ## order->sem, (val));          \
+        uvm_lock_debug_init(semaphore, order);          \
+    })

 #define uvm_sem_is_locked(uvm_sem) uvm_check_locked(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)

@ -1012,15 +1025,12 @@ typedef struct

 #define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))

-static void uvm_spin_lock_init(uvm_spinlock_t *spinlock, uvm_lock_order_t lock_order)
-{
-    spin_lock_init(&spinlock->lock);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    spinlock->lock_order = lock_order;
-#endif
-    uvm_assert_spinlock_unlocked(spinlock);
-}
+#define uvm_spin_lock_init(spinlock, order) ({                  \
+            uvm_spinlock_t *spinlock_ ## order = (spinlock);    \
+            spin_lock_init(&spinlock_ ## order->lock);          \
+            uvm_lock_debug_init(spinlock, order);               \
+            uvm_assert_spinlock_unlocked(spinlock);             \
+    })

 #define uvm_spin_lock(uvm_lock) ({                             \
        typeof(uvm_lock) _lock = (uvm_lock);                   \
@ -1036,15 +1046,12 @@ static void uvm_spin_lock_init(uvm_spinlock_t *spinlock, uvm_lock_order_t lock_o
        uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
    })

-static void uvm_spin_lock_irqsave_init(uvm_spinlock_irqsave_t *spinlock, uvm_lock_order_t lock_order)
-{
-    spin_lock_init(&spinlock->lock);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    spinlock->lock_order = lock_order;
-#endif
-    uvm_assert_spinlock_unlocked(spinlock);
-}
+#define uvm_spin_lock_irqsave_init(spinlock, order) ({                  \
+            uvm_spinlock_irqsave_t *spinlock_ ## order = (spinlock);    \
+            spin_lock_init(&spinlock_ ## order->lock);                  \
+            uvm_lock_debug_init(spinlock, order);                       \
+            uvm_assert_spinlock_unlocked(spinlock);                     \
+    })

 // Use a temp to not rely on flags being written after acquiring the lock.
 #define uvm_spin_lock_irqsave(uvm_lock) ({                     \
@ -1119,16 +1126,12 @@ static void uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t *rwlock)
    #define uvm_assert_rwlock_unlocked(uvm_rwlock)
 #endif

-static void uvm_rwlock_irqsave_init(uvm_rwlock_irqsave_t *rwlock, uvm_lock_order_t lock_order)
-{
-    rwlock_init(&rwlock->lock);
-#if UVM_IS_DEBUG()
-    uvm_locking_assert_initialized();
-    rwlock->lock_order = lock_order;
-    atomic_set(&rwlock->lock_count, 0);
-#endif
-    uvm_assert_rwlock_unlocked(rwlock);
-}
+#define uvm_rwlock_irqsave_init(rwlock, order) ({               \
+            uvm_rwlock_irqsave_t *rwlock_ ## order = rwlock;    \
+            rwlock_init(&rwlock_ ## order->lock);               \
+            uvm_lock_debug_init(rwlock, order);                 \
+            uvm_assert_rwlock_unlocked(rwlock);                 \
+        })

 // We can't store the irq_flags within the lock itself for readers, so they must
 // pass in their flags.
--- a/kernel-open/nvidia-uvm/uvm_map_external.c
+++ b/kernel-open/nvidia-uvm/uvm_map_external.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2022 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -633,10 +633,17 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
                                          uvm_gpu_t *mapping_gpu,
                                          const UvmGpuMemoryInfo *mem_info)
 {
-    uvm_gpu_t *owning_gpu;
+    uvm_gpu_t *owning_gpu = NULL;
+    uvm_gpu_t *gpu;
+
    if (mem_info->egm)
        UVM_ASSERT(mem_info->sysmem);

+    // !mem_info->deviceDescendant && !mem_info->sysmem imply fabric allocation.
+    // !mem_info->deviceDescendant also means that mem_info->uuid is invalid. In
+    // this case the owning GPU is NULL, meaning that UVM is oblivious to the
+    // topology and relies on RM and/or the fabric manager (FM) for memory
+    // lifetime management and GPU ref counting.
    if (!mem_info->deviceDescendant && !mem_info->sysmem) {
        ext_gpu_map->owning_gpu = NULL;
        ext_gpu_map->is_sysmem = false;
@ -645,7 +652,17 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
    // This is a local or peer allocation, so the owning GPU must have been
    // registered.
    // This also checks for if EGM owning GPU is registered.
-    owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
+
+    // TODO: Bug 4351121: RM will return the GI UUID, but
+    // uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
+    // Match on GI UUID until the UVM user level API has been updated to use
+    // the GI UUID.
+    for_each_va_space_gpu(gpu, va_space) {
+        if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
+            owning_gpu = gpu;
+            break;
+        }
+    }
    if (!owning_gpu)
        return NV_ERR_INVALID_DEVICE;

@ -1343,7 +1360,9 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
 {
    uvm_va_range_t *va_range;
    NV_STATUS status = NV_OK;
-    uvm_global_processor_mask_t retained_mask;
+    // TODO: Bug 4351121: retained_mask should be pre-allocated, not on the
+    // stack.
+    uvm_processor_mask_t retained_mask;
    LIST_HEAD(deferred_free_list);

    if (uvm_api_range_invalid_4k(base, length))
@ -1379,14 +1398,14 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
        // External ranges may have deferred free work, so the GPUs may have to
        // be retained. Construct the mask of all the GPUs that need to be
        // retained.
-        uvm_va_space_global_gpus_in_mask(va_space, &retained_mask, &va_range->external.mapped_gpus);
+        uvm_processor_mask_and(&retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
    }

    uvm_va_range_destroy(va_range, &deferred_free_list);

    // If there is deferred work, retain the required GPUs.
    if (!list_empty(&deferred_free_list))
-        uvm_global_mask_retain(&retained_mask);
+        uvm_global_gpu_retain(&retained_mask);

 out:
    uvm_va_space_up_write(va_space);
@ -1394,7 +1413,7 @@ out:
    if (!list_empty(&deferred_free_list)) {
        UVM_ASSERT(status == NV_OK);
        uvm_deferred_free_object_list(&deferred_free_list);
-        uvm_global_mask_release(&retained_mask);
+        uvm_global_gpu_release(&retained_mask);
    }

    return status;
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -60,6 +60,8 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)

    parent_gpu->access_counters_supported = false;

+    parent_gpu->access_counters_can_use_physical_addresses = false;
+
    parent_gpu->fault_cancel_va_supported = false;

    parent_gpu->scoped_atomics_supported = false;
@ -71,4 +73,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->smc.supported = false;

    parent_gpu->plc_supported = false;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_access_counter_buffer.c
@ -26,39 +26,53 @@

 void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "enable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "enable_access_counter_notifications is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "disable_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "disable_access_counter_notifications is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
 {
-    UVM_ASSERT_MSG(false, "clear_access_counter_notifications is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "clear_access_counter_notifications is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "access_counter_buffer_entry_size is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return 0;
 }

 bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return false;
 }

 void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT_MSG(false, "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
                                                                   NvU32 index,
                                                                   uvm_access_counter_buffer_entry_t *buffer_entry)
 {
-    UVM_ASSERT_MSG(false, "access_counter_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_ce.c
@ -186,7 +186,7 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void)
 }

 // Noop, since COPY_TYPE doesn't exist in Maxwell.
-NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
+NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src)
 {
    return 0;
 }
@ -212,7 +212,7 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu

    launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
    launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
-    copy_type_value = gpu->parent->ce_hal->memcopy_copy_type(push, dst, src);
+    copy_type_value = gpu->parent->ce_hal->memcopy_copy_type(dst, src);

    if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
        pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
--- a/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_fault_buffer.c
@ -26,34 +26,46 @@

 void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "enable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "enable_replayable_faults is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "disable_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "disable_replayable_faults is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
 {
-    UVM_ASSERT_MSG(false, "clear_replayable_faults is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "clear_replayable_faults is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_read_put is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_read_put is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return 0;
 }

 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_read_get is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_read_get is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return 0;
 }

 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_write_get is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_write_get is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
@ -72,24 +84,32 @@ NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_pa
                                                                          NvU32 index,
                                                                          uvm_fault_buffer_entry_t *buffer_entry)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_parse_entry is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_parse_entry is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return NV_ERR_NOT_SUPPORTED;
 }

 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_entry_is_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_entry_is_valid is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return false;
 }

 void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_entry_clear_valid is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_entry_clear_valid is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

 NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_entry_size is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_entry_size is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
    return 0;
 }

@ -97,6 +117,8 @@ void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_par
                                                                         void *fault_packet,
                                                                         uvm_fault_buffer_entry_t *buffer_entry)
 {
-    UVM_ASSERT_MSG(false, "fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n", parent_gpu->name);
+    UVM_ASSERT_MSG(false,
+                   "fault_buffer_parse_non_replayable_entry is not supported on GPU: %s.\n",
+                   uvm_parent_gpu_name(parent_gpu));
 }

--- a/Show More
+++ b/Show More