535.129.03

2023-10-31 14:22:16 +01:00 · 2023-10-31 14:22:16 +01:00 · e573018659
parent f59818b751
commit e573018659
163 changed files with 86017 additions and 84520 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,8 @@

 ## Release 535 Entries

+### [535.129.03] 2023-10-31
+
 ### [535.113.01] 2023-09-21

 #### Fixed
@ -50,6 +52,10 @@

 ## Release 525 Entries

+#### Fixed
+
+- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
+
 ### [525.116.04] 2023-05-09

 ### [525.116.03] 2023-04-25
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 535.113.01.
+version 535.129.03.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-535.113.01 driver release.  This can be achieved by installing
+535.129.03 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -180,7 +180,7 @@ software applications.
 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 535.113.01 release,
+(see the table below). However, in the 535.129.03 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/535.113.01/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/535.129.03/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -658,6 +658,7 @@ Subsystem Device ID.
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 147F |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1622 |
 | NVIDIA A100-SXM4-80GB                           | 20B2 10DE 1623 |
+| NVIDIA PG509-210                                | 20B2 10DE 1625 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A7 |
 | NVIDIA A100-SXM-64GB                            | 20B3 10DE 14A8 |
 | NVIDIA A100 80GB PCIe                           | 20B5 10DE 1533 |
@ -665,6 +666,7 @@ Subsystem Device ID.
 | NVIDIA PG506-232                                | 20B6 10DE 1492 |
 | NVIDIA A30                                      | 20B7 10DE 1532 |
 | NVIDIA A30                                      | 20B7 10DE 1804 |
+| NVIDIA A30                                      | 20B7 10DE 1852 |
 | NVIDIA A800-SXM4-40GB                           | 20BD 10DE 17F4 |
 | NVIDIA A100-PCIE-40GB                           | 20F1 10DE 145F |
 | NVIDIA A800-SXM4-80GB                           | 20F3 10DE 179B |
@ -748,6 +750,8 @@ Subsystem Device ID.
 | NVIDIA H100 PCIe                                | 2331 10DE 1626 |
 | NVIDIA H100                                     | 2339 10DE 17FC |
 | NVIDIA H800 NVL                                 | 233A 10DE 183A |
+| GH200 120GB                                     | 2342 10DE 16EB |
+| GH200 480GB                                     | 2342 10DE 1809 |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
 | NVIDIA RTX A5500 Laptop GPU                     | 2438           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.113.01\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.129.03\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -123,6 +123,9 @@ ifneq ($(wildcard /proc/sgi_uv),)
 EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
 endif

+ifdef VGX_FORCE_VFIO_PCI_CORE
+ EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
+endif

 #
 # The conftest.sh script tests various aspects of the target kernel.
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@ -4468,6 +4468,24 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE" "" "types"
        ;;

+        mmu_notifier_ops_arch_invalidate_secondary_tlbs)
+            #
+            # Determine if the mmu_notifier_ops struct has the
+            # 'arch_invalidate_secondary_tlbs' member.
+            #
+            # struct mmu_notifier_ops.invalidate_range was renamed to
+            # arch_invalidate_secondary_tlbs by commit 1af5a8109904
+            # ("mmu_notifiers: rename invalidate_range notifier") due to be
+            # added in v6.6
+           CODE="
+            #include <linux/mmu_notifier.h>
+            int conftest_mmu_notifier_ops_arch_invalidate_secondary_tlbs(void) {
+                return offsetof(struct mmu_notifier_ops, arch_invalidate_secondary_tlbs);
+            }"
+
+            compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
+        ;;
+
        drm_format_num_planes)
            #
            # Determine if drm_format_num_planes() function is present.
@ -6681,18 +6699,9 @@ case "$5" in
                VFIO_PCI_CORE_PRESENT=1
            fi

-            # When this sanity check is run via nvidia-installer, it sets ARCH as aarch64.
-            # But, when it is run via Kbuild, ARCH is set as arm64
-            if [ "$ARCH" = "aarch64" ]; then
-                ARCH="arm64"
-            fi
-
            if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
-
-                # On x86_64, vGPU requires MDEV framework to be present.
-                # On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
-                if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
-                   ([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
+                # vGPU requires either MDEV or vfio-pci-core framework to be present.
+                if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
                    exit 0
                fi
            fi
@ -6703,14 +6712,10 @@ case "$5" in
                echo "CONFIG_VFIO_IOMMU_TYPE1";
            fi

-            if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
+            if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
                echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
            fi

-            if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
-                echo "CONFIG_VFIO_MDEV";
-            fi
-
            if [ "$KVM_PRESENT" = "0" ]; then
                echo "CONFIG_KVM";
            fi
--- a/kernel-open/nvidia-peermem/nvidia-peermem.c
+++ b/kernel-open/nvidia-peermem/nvidia-peermem.c
@ -53,7 +53,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
 module_param(peerdirect_support, int, S_IRUGO);
 MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");

-#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
+
+#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
+#ifdef NV_MEM_DEBUG
+#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
+#else
+#define peer_trace(FMT, ARGS...) do {} while (0)
+#endif

 #if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)

@ -74,7 +80,10 @@ invalidate_peer_memory mem_invalidate_callback;
 static void *reg_handle = NULL;
 static void *reg_handle_nc = NULL;

+#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
+
 struct nv_mem_context {
+    u64 pad1;
    struct nvidia_p2p_page_table *page_table;
    struct nvidia_p2p_dma_mapping *dma_mapping;
    u64 core_context;
@ -86,8 +95,22 @@ struct nv_mem_context {
    struct task_struct *callback_task;
    int sg_allocated;
    struct sg_table sg_head;
+    u64 pad2;
 };

+#define NV_MEM_CONTEXT_CHECK_OK(MC) ({                                  \
+    struct nv_mem_context *mc = (MC);                                   \
+    int rc = ((0 != mc) &&                                              \
+              (READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) &&          \
+              (READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC));           \
+    if (!rc) {                                                          \
+        peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
+                   mc,                                                  \
+                   mc?mc->pad1:0,                                       \
+                   mc?mc->pad2:0);                                      \
+    }                                                                   \
+    rc;                                                                 \
+})

 static void nv_get_p2p_free_callback(void *data)
 {
@ -97,8 +120,9 @@ static void nv_get_p2p_free_callback(void *data)
    struct nvidia_p2p_dma_mapping *dma_mapping = NULL;

    __module_get(THIS_MODULE);
-    if (!nv_mem_context) {
-        peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
+
+    if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
+        peer_err("detected invalid context, skipping further processing\n");
        goto out;
    }

@ -169,9 +193,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
        /* Error case handled as not mine */
        return 0;

+    nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
    nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
    nv_mem_context->page_virt_end   = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
    nv_mem_context->mapped_size  = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
+    nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;

    ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
                               &nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
@ -195,6 +221,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
    return 1;

 err:
+    memset(nv_mem_context, 0, sizeof(*nv_mem_context));
    kfree(nv_mem_context);

    /* Error case handled as not mine */
@ -342,6 +369,7 @@ static void nv_mem_release(void *context)
        sg_free_table(&nv_mem_context->sg_head);
        nv_mem_context->sg_allocated = 0;
    }
+    memset(nv_mem_context, 0, sizeof(*nv_mem_context));
    kfree(nv_mem_context);
    module_put(THIS_MODULE);
    return;
--- a/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
+++ b/kernel-open/nvidia-uvm/nvidia-uvm.Kbuild
@ -99,6 +99,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
 NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
 NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
+NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
 NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
 NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
--- a/kernel-open/nvidia-uvm/uvm.c
+++ b/kernel-open/nvidia-uvm/uvm.c
@ -571,7 +571,6 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
 static void uvm_vm_close_managed(struct vm_area_struct *vma)
 {
    uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
-    uvm_processor_id_t gpu_id;
    bool make_zombie = false;

    if (current->mm != NULL)
@ -606,12 +605,6 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)

    uvm_destroy_vma_managed(vma, make_zombie);

-    // Notify GPU address spaces that the fault buffer needs to be flushed to
-    // avoid finding stale entries that can be attributed to new VA ranges
-    // reallocated at the same address.
-    for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
-        uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
-    }
    uvm_va_space_up_write(va_space);

    if (current->mm != NULL)
--- a/kernel-open/nvidia-uvm/uvm_ada.c
+++ b/kernel-open/nvidia-uvm/uvm_ada.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2021 NVIDIA Corporation
+    Copyright (c) 2021-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -94,4 +94,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ampere.c
+++ b/kernel-open/nvidia-uvm/uvm_ampere.c
@ -101,4 +101,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
        parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@ -107,10 +107,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
    return status;
 }

-static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
-                                   NvU64 addr,
-                                   size_t size,
-                                   uvm_fault_client_type_t client_type)
+static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
+                                NvU64 addr,
+                                size_t size,
+                                uvm_fault_client_type_t client_type)
 {
    uvm_ats_fault_invalidate_t *ats_invalidate;

@ -119,12 +119,12 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
    else
        ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;

-    if (!ats_invalidate->write_faults_in_batch) {
-        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
-        ats_invalidate->write_faults_in_batch = true;
+    if (!ats_invalidate->tlb_batch_pending) {
+        uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
+        ats_invalidate->tlb_batch_pending = true;
    }

-    uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
+    uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
 }

 static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
@ -497,6 +497,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,

        if (vma->vm_flags & VM_WRITE) {
            uvm_page_mask_region_fill(faults_serviced_mask, subregion);
+            uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);

            // The Linux kernel never invalidates TLB entries on mapping
            // permission upgrade. This is a problem if the GPU has cached
@ -507,7 +508,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
            // infinite loop because we just forward the fault to the Linux
            // kernel and it will see that the permissions in the page table are
            // correct. Therefore, we flush TLB entries on ATS write faults.
-            flush_tlb_write_faults(gpu_va_space, start, length, client_type);
+            flush_tlb_va_region(gpu_va_space, start, length, client_type);
        }
        else {
            uvm_page_mask_region_fill(reads_serviced_mask, subregion);
@ -530,6 +531,15 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
            return status;

        uvm_page_mask_region_fill(faults_serviced_mask, subregion);
+
+        // Similarly to permission upgrade scenario, discussed above, GPU
+        // will not re-fetch the entry if the PTE is invalid and page size
+        // is 4K. To avoid infinite faulting loop, invalidate TLB for every
+        // new translation written explicitly like in the case of permission
+        // upgrade.
+        if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
+            flush_tlb_va_region(gpu_va_space, start, length, client_type);
+
    }

    return status;
@ -564,7 +574,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
    NV_STATUS status;
    uvm_push_t push;

-    if (!ats_invalidate->write_faults_in_batch)
+    if (!ats_invalidate->tlb_batch_pending)
        return NV_OK;

    UVM_ASSERT(gpu_va_space);
@ -576,7 +586,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
                            "Invalidate ATS entries");

    if (status == NV_OK) {
-        uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
+        uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
        uvm_push_end(&push);

        // Add this push to the GPU's tracker so that fault replays/clears can
@ -584,8 +594,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
        status = uvm_tracker_add_push_safe(out_tracker, &push);
    }

-    ats_invalidate->write_faults_in_batch = false;
+    ats_invalidate->tlb_batch_pending = false;

    return status;
 }
-
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@ -52,7 +52,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
 bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);

 // This function performs pending TLB invalidations for ATS and clears the
-// ats_invalidate->write_faults_in_batch flag
+// ats_invalidate->tlb_batch_pending flag
 NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
                                  uvm_ats_fault_invalidate_t *ats_invalidate,
                                  uvm_tracker_t *out_tracker);
--- a/kernel-open/nvidia-uvm/uvm_ats_sva.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.c
@ -29,8 +29,13 @@
 #include "uvm_va_space.h"
 #include "uvm_va_space_mm.h"

+#include <asm/io.h>
+#include <linux/log2.h>
 #include <linux/iommu.h>
 #include <linux/mm_types.h>
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/mmu_context.h>

 // linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
 // reference required for the iommu_sva_bind_device() call. This header is not
@ -46,17 +51,276 @@
 #define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
 #endif

+// Type to represent a 128-bit SMMU command queue command.
+struct smmu_cmd {
+    NvU64 low;
+    NvU64 high;
+};
+
+// Base address of SMMU CMDQ-V for GSMMU0.
+#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
+#define SMMU_CMDQV_BASE_LEN 0x00830000
+
+// CMDQV configuration is done by firmware but we check status here.
+#define SMMU_CMDQV_CONFIG 0x0
+#define SMMU_CMDQV_CONFIG_CMDQV_EN BIT(0)
+
+// Used to map a particular VCMDQ to a VINTF.
+#define SMMU_CMDQV_CMDQ_ALLOC_MAP(vcmdq_id) (0x200 + 0x4 * (vcmdq_id))
+#define SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC BIT(0)
+
+// Shift for the field containing the index of the virtual interface
+// owning the VCMDQ.
+#define SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT 15
+
+// Base address for the VINTF registers.
+#define SMMU_VINTF_BASE_ADDR(cmdqv_base_addr, vintf_id) (cmdqv_base_addr + 0x1000 + 0x100 * (vintf_id))
+
+// Virtual interface (VINTF) configuration registers. The WAR only
+// works on baremetal so we need to configure ourselves as the
+// hypervisor owner.
+#define SMMU_VINTF_CONFIG 0x0
+#define SMMU_VINTF_CONFIG_ENABLE BIT(0)
+#define SMMU_VINTF_CONFIG_HYP_OWN BIT(17)
+
+#define SMMU_VINTF_STATUS 0x0
+#define SMMU_VINTF_STATUS_ENABLED BIT(0)
+
+// Caclulates the base address for a particular VCMDQ instance.
+#define SMMU_VCMDQ_BASE_ADDR(cmdqv_base_addr, vcmdq_id) (cmdqv_base_addr + 0x10000 + 0x80 * (vcmdq_id))
+
+// SMMU command queue consumer index register. Updated by SMMU
+// when commands are consumed.
+#define SMMU_VCMDQ_CONS 0x0
+
+// SMMU command queue producer index register. Updated by UVM when
+// commands are added to the queue.
+#define SMMU_VCMDQ_PROD 0x4
+
+// Configuration register used to enable a VCMDQ.
+#define SMMU_VCMDQ_CONFIG 0x8
+#define SMMU_VCMDQ_CONFIG_ENABLE BIT(0)
+
+// Status register used to check the VCMDQ is enabled.
+#define SMMU_VCMDQ_STATUS 0xc
+#define SMMU_VCMDQ_STATUS_ENABLED BIT(0)
+
+// Base address offset for the VCMDQ registers.
+#define SMMU_VCMDQ_CMDQ_BASE 0x10000
+
+// Size of the command queue. Each command is 16 bytes and we can't
+// have a command queue greater than one page in size.
+#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
+#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
+
+// We always use VINTF63 for the WAR
+#define VINTF 63
+static void smmu_vintf_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
+{
+    iowrite32(val, SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
+}
+
+static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
+{
+    return ioread32(SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
+}
+
+// We always use VCMDQ127 for the WAR
+#define VCMDQ 127
+void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
+{
+    iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
+}
+
+NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
+{
+    return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
+}
+
+static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
+{
+    iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
+}
+
+// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
+// TLB invalidates on read-only to read-write upgrades
+static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
+{
+    uvm_spin_loop_t spin;
+    NV_STATUS status;
+    unsigned long cmdqv_config;
+    void __iomem *smmu_cmdqv_base;
+    struct acpi_iort_node *node;
+    struct acpi_iort_smmu_v3 *iort_smmu;
+
+    node = *(struct acpi_iort_node **) dev_get_platdata(parent_gpu->pci_dev->dev.iommu->iommu_dev->dev->parent);
+    iort_smmu = (struct acpi_iort_smmu_v3 *) node->node_data;
+
+    smmu_cmdqv_base = ioremap(SMMU_CMDQV_BASE_ADDR(iort_smmu->base_address), SMMU_CMDQV_BASE_LEN);
+    if (!smmu_cmdqv_base)
+        return NV_ERR_NO_MEMORY;
+
+    parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
+    cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
+    if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
+        status = NV_ERR_OBJECT_NOT_FOUND;
+        goto out;
+    }
+
+    // Allocate SMMU CMDQ pages for WAR
+    parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
+    if (!parent_gpu->smmu_war.smmu_cmdq) {
+        status = NV_ERR_NO_MEMORY;
+        goto out;
+    }
+
+    // Initialise VINTF for the WAR
+    smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, SMMU_VINTF_CONFIG_ENABLE | SMMU_VINTF_CONFIG_HYP_OWN);
+    UVM_SPIN_WHILE(!(smmu_vintf_read32(smmu_cmdqv_base, SMMU_VINTF_STATUS) & SMMU_VINTF_STATUS_ENABLED), &spin);
+
+    // Allocate VCMDQ to VINTF
+    iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
+              smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
+
+    smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
+                       page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
+    smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
+    smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
+    smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
+    UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
+
+    uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
+    parent_gpu->smmu_war.smmu_prod = 0;
+    parent_gpu->smmu_war.smmu_cons = 0;
+
+    return NV_OK;
+
+out:
+    iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
+    parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
+
+    return status;
+}
+
+static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
+{
+    void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
+    NvU32 cmdq_alloc_map;
+
+    if (parent_gpu->smmu_war.smmu_cmdqv_base) {
+        smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
+        cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
+        iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
+        smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
+    }
+
+    if (parent_gpu->smmu_war.smmu_cmdq)
+        __free_page(parent_gpu->smmu_war.smmu_cmdq);
+
+    if (parent_gpu->smmu_war.smmu_cmdqv_base)
+        iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
+}
+
+// The SMMU on ARM64 can run under different translation regimes depending on
+// what features the OS and CPU variant support. The CPU for GH180 supports
+// virtualisation extensions and starts the kernel at EL2 meaning SMMU operates
+// under the NS-EL2-E2H translation regime. Therefore we need to use the
+// TLBI_EL2_* commands which invalidate TLB entries created under this
+// translation regime.
+#define CMDQ_OP_TLBI_EL2_ASID 0x21;
+#define CMDQ_OP_TLBI_EL2_VA 0x22;
+#define CMDQ_OP_CMD_SYNC 0x46
+
+// Use the same maximum as used for MAX_TLBI_OPS in the upstream
+// kernel.
+#define UVM_MAX_TLBI_OPS (1UL << (PAGE_SHIFT - 3))
+
+#if UVM_ATS_SMMU_WAR_REQUIRED()
+void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
+{
+    struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
+    uvm_parent_gpu_t *parent_gpu = gpu_va_space->gpu->parent;
+    struct {
+        NvU64 low;
+        NvU64 high;
+    } *vcmdq;
+    unsigned long vcmdq_prod;
+    NvU64 end;
+    uvm_spin_loop_t spin;
+    NvU16 asid;
+
+    if (!parent_gpu->smmu_war.smmu_cmdqv_base)
+        return;
+
+    asid = arm64_mm_context_get(mm);
+    vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
+    uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
+    vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
+
+    // Our queue management is very simple. The mutex prevents multiple
+    // producers writing to the queue and all our commands require waiting for
+    // the queue to drain so we know it's empty. If we can't fit enough commands
+    // in the queue we just invalidate the whole ASID.
+    //
+    // The command queue is a cirular buffer with the MSB representing a wrap
+    // bit that must toggle on each wrap. See the SMMU architecture
+    // specification for more details.
+    //
+    // SMMU_VCMDQ_CMDQ_ENTRIES - 1 because we need to leave space for the
+    // CMD_SYNC.
+    if ((size >> PAGE_SHIFT) > min(UVM_MAX_TLBI_OPS, SMMU_VCMDQ_CMDQ_ENTRIES - 1)) {
+        vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_ASID;
+        vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
+        vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0;
+        vcmdq_prod++;
+    }
+    else {
+        for (end = addr + size; addr < end; addr += PAGE_SIZE) {
+            vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_VA;
+            vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
+            vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = addr & ~((1UL << 12) - 1);
+            vcmdq_prod++;
+        }
+    }
+
+    vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_CMD_SYNC;
+    vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0x0;
+    vcmdq_prod++;
+
+    // MSB is the wrap bit
+    vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
+    parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
+    smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
+
+    UVM_SPIN_WHILE(
+        (smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
+        &spin);
+
+    uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
+    kunmap(parent_gpu->smmu_war.smmu_cmdq);
+    arm64_mm_context_put(mm);
+}
+#endif
+
 NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
 {
    int ret;

    ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
+    if (ret)
+        return errno_to_nv_status(ret);

-    return errno_to_nv_status(ret);
+    if (UVM_ATS_SMMU_WAR_REQUIRED())
+        return uvm_ats_smmu_war_init(parent_gpu);
+    else
+        return NV_OK;
 }

 void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
 {
+    if (UVM_ATS_SMMU_WAR_REQUIRED())
+        uvm_ats_smmu_war_deinit(parent_gpu);
+
    iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
 }

--- a/kernel-open/nvidia-uvm/uvm_ats_sva.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_sva.h
@ -53,6 +53,17 @@
        #define UVM_ATS_SVA_SUPPORTED() 0
    #endif

+// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
+// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
+// kernel not issuing SMMU TLB invalidates on read-only
+#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
+    #define UVM_ATS_SMMU_WAR_REQUIRED() 0
+#elif NVCPU_IS_AARCH64
+    #define UVM_ATS_SMMU_WAR_REQUIRED() 1
+#else
+    #define UVM_ATS_SMMU_WAR_REQUIRED() 0
+#endif
+
 typedef struct
 {
    int placeholder;
@ -81,6 +92,17 @@ typedef struct

    // LOCKING: None
    void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
+
+    // Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
+    // TLB invalidates on read-only to read-write upgrades
+    #if UVM_ATS_SMMU_WAR_REQUIRED()
+        void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size);
+    #else
+        static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
+        {
+
+        }
+    #endif
 #else
    static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
    {
@ -111,6 +133,11 @@ typedef struct
    {

    }
+
+    static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
+    {
+
+    }
 #endif // UVM_ATS_SVA_SUPPORTED

 #endif // __UVM_ATS_SVA_H__
--- a/kernel-open/nvidia-uvm/uvm_common.h
+++ b/kernel-open/nvidia-uvm/uvm_common.h
@ -21,8 +21,8 @@

 *******************************************************************************/

-#ifndef _UVM_COMMON_H
-#define _UVM_COMMON_H
+#ifndef __UVM_COMMON_H__
+#define __UVM_COMMON_H__

 #ifdef DEBUG
    #define UVM_IS_DEBUG() 1
@ -413,4 +413,40 @@ static inline void uvm_touch_page(struct page *page)
 // Return true if the VMA is one used by UVM managed allocations.
 bool uvm_vma_is_managed(struct vm_area_struct *vma);

-#endif /* _UVM_COMMON_H */
+static bool uvm_platform_uses_canonical_form_address(void)
+{
+    if (NVCPU_IS_PPC64LE)
+        return false;
+
+    return true;
+}
+
+// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
+static NvU32 uvm_cpu_num_va_bits(void)
+{
+    return fls64(TASK_SIZE - 1) + 1;
+}
+
+// Return the unaddressable range in a num_va_bits-wide VA space, [first, outer)
+static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *outer)
+{
+    UVM_ASSERT(num_va_bits < 64);
+    UVM_ASSERT(first);
+    UVM_ASSERT(outer);
+
+    if (uvm_platform_uses_canonical_form_address()) {
+        *first = 1ULL << (num_va_bits - 1);
+        *outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
+    }
+    else {
+        *first = 1ULL << num_va_bits;
+        *outer = ~0Ull;
+    }
+}
+
+static void uvm_cpu_get_unaddressable_range(NvU64 *first, NvU64 *outer)
+{
+    return uvm_get_unaddressable_range(uvm_cpu_num_va_bits(), first, outer);
+}
+
+#endif /* __UVM_COMMON_H__ */
--- a/kernel-open/nvidia-uvm/uvm_gpu.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu.c
@ -218,19 +218,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
    return parent_gpu->rm_info.subdeviceCount == 1;
 }

-static bool platform_uses_canonical_form_address(void)
-{
-    if (NVCPU_IS_PPC64LE)
-        return false;
-
-    return true;
-}
-
 bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
 {
    // Lower and upper address spaces are typically found in platforms that use
    // the canonical address form.
    NvU64 max_va_lower;
+    NvU64 min_va_upper;
    NvU64 addr_end = addr + size - 1;
    NvU8 gpu_addr_shift;
    NvU8 cpu_addr_shift;
@ -243,7 +236,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    UVM_ASSERT(size > 0);

    gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
-    cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
+    cpu_addr_shift = uvm_cpu_num_va_bits();
    addr_shift = gpu_addr_shift;

    // Pascal+ GPUs are capable of accessing kernel pointers in various modes
@ -279,9 +272,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
    //               0 +----------------+               0 +----------------+

    // On canonical form address platforms and Pascal+ GPUs.
-    if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
-        NvU64 min_va_upper;
-
+    if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
        // On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
        // 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
        // wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
@ -292,15 +283,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
            addr_shift = gpu_addr_shift;
        else
            addr_shift = cpu_addr_shift;
+    }

-        min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
-        max_va_lower = 1ULL << (addr_shift - 1);
-        return (addr_end < max_va_lower) || (addr >= min_va_upper);
-    }
-    else {
-        max_va_lower = 1ULL << addr_shift;
-        return addr_end < max_va_lower;
-    }
+    uvm_get_unaddressable_range(addr_shift, &max_va_lower, &min_va_upper);
+
+    return (addr_end < max_va_lower) || (addr >= min_va_upper);
 }

 // The internal UVM VAS does not use canonical form addresses.
@ -326,14 +313,14 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
    NvU8 addr_shift;
    NvU64 input_addr = addr;

-    if (platform_uses_canonical_form_address()) {
+    if (uvm_platform_uses_canonical_form_address()) {
        // When the CPU VA width is larger than GPU's, it means that:
        // On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
        // On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
        // We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
        // behavior of CPUs with smaller (than GPU) VA widths.
        gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
-        cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
+        cpu_addr_shift = uvm_cpu_num_va_bits();

        if (cpu_addr_shift > gpu_addr_shift)
            addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@ -57,14 +57,16 @@

 typedef struct
 {
-    // Number of faults from this uTLB that have been fetched but have not been serviced yet
+    // Number of faults from this uTLB that have been fetched but have not been
+    // serviced yet.
    NvU32 num_pending_faults;

    // Whether the uTLB contains fatal faults
    bool has_fatal_faults;

-    // We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
-    // the uTLB in lockdown mode and no new translations are accepted
+    // We have issued a replay of type START_ACK_ALL while containing fatal
+    // faults. This puts the uTLB in lockdown mode and no new translations are
+    // accepted.
    bool in_lockdown;

    // We have issued a cancel on this uTLB
@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
        struct list_head service_context_list;

        // A mask of GPUs that need to be checked for ECC errors before the CPU
-        // fault handler returns, but after the VA space lock has been unlocked to
-        // avoid the RM/UVM VA space lock deadlocks.
+        // fault handler returns, but after the VA space lock has been unlocked
+        // to avoid the RM/UVM VA space lock deadlocks.
        uvm_processor_mask_t gpus_to_check_for_ecc;

        // This is set to throttle page fault thrashing.
@ -160,9 +162,9 @@ struct uvm_service_block_context_struct

    struct
    {
-        // Per-processor mask with the pages that will be resident after servicing.
-        // We need one mask per processor because we may coalesce faults that
-        // trigger migrations to different processors.
+        // Per-processor mask with the pages that will be resident after
+        // servicing. We need one mask per processor because we may coalesce
+        // faults that trigger migrations to different processors.
        uvm_page_mask_t new_residency;
    } per_processor_masks[UVM_ID_MAX_PROCESSORS];

@ -263,7 +265,10 @@ struct uvm_fault_service_batch_context_struct

    NvU32 num_coalesced_faults;

-    bool has_fatal_faults;
+    // One of the VA spaces in this batch which had fatal faults. If NULL, no
+    // faults were fatal. More than one VA space could have fatal faults, but we
+    // pick one to be the target of the cancel sequence.
+    uvm_va_space_t *fatal_va_space;

    bool has_throttled_faults;

@ -291,11 +296,8 @@ struct uvm_fault_service_batch_context_struct

 struct uvm_ats_fault_invalidate_struct
 {
-    // Whether the TLB batch contains any information
-    bool            write_faults_in_batch;
-
-    // Batch of TLB entries to be invalidated
-    uvm_tlb_batch_t write_faults_tlb_batch;
+    bool            tlb_batch_pending;
+    uvm_tlb_batch_t tlb_batch;
 };

 typedef struct
@ -440,20 +442,9 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                             num_notifications;

        // Boolean used to avoid sorting the fault batch by instance_ptr if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same instance_ptr
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same instance_ptr
        bool is_single_instance_ptr;
-
-        // Scratch space, used to generate artificial physically addressed notifications.
-        // Virtual address notifications are always aligned to 64k. This means up to 16
-        // different physical locations could have been accessed to trigger one notification.
-        // The sub-granularity mask can correspond to any of them.
-        struct
-        {
-            uvm_processor_id_t resident_processors[16];
-            uvm_gpu_phys_address_t phys_addresses[16];
-            uvm_access_counter_buffer_entry_t phys_entry;
-        } scratch;
    } virt;

    struct
@ -464,8 +455,8 @@ struct uvm_access_counter_service_batch_context_struct
        NvU32                              num_notifications;

        // Boolean used to avoid sorting the fault batch by aperture if we
-        // determine at fetch time that all the access counter notifications in the
-        // batch report the same aperture
+        // determine at fetch time that all the access counter notifications in
+        // the batch report the same aperture
        bool                              is_single_aperture;
    } phys;

@ -661,8 +652,8 @@ struct uvm_gpu_struct
    struct
    {
        // Big page size used by the internal UVM VA space
-        // Notably it may be different than the big page size used by a user's VA
-        // space in general.
+        // Notably it may be different than the big page size used by a user's
+        // VA space in general.
        NvU32 internal_size;
    } big_page;

@ -688,8 +679,8 @@ struct uvm_gpu_struct
        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];

-        // Leaf spinlock used to synchronize access to the peer_gpus table so that
-        // it can be safely accessed from the access counters bottom half
+        // Leaf spinlock used to synchronize access to the peer_gpus table so
+        // that it can be safely accessed from the access counters bottom half
        uvm_spinlock_t peer_gpus_lock;
    } peer_info;

@ -980,6 +971,10 @@ struct uvm_parent_gpu_struct

    bool plc_supported;

+    // If true, page_tree initialization pre-populates no_ats_ranges. It only
+    // affects ATS systems.
+    bool no_ats_range_required;
+
    // Parameters used by the TLB batching API
    struct
    {
@ -1051,14 +1046,16 @@ struct uvm_parent_gpu_struct
    // Interrupt handling state and locks
    uvm_isr_info_t isr;

-    // Fault buffer info. This is only valid if supports_replayable_faults is set to true
+    // Fault buffer info. This is only valid if supports_replayable_faults is
+    // set to true.
    uvm_fault_buffer_info_t fault_buffer_info;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

-    // Access counter buffer info. This is only valid if supports_access_counters is set to true
+    // Access counter buffer info. This is only valid if
+    // supports_access_counters is set to true.
    uvm_access_counter_buffer_info_t access_counter_buffer_info;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
@ -1108,7 +1105,7 @@ struct uvm_parent_gpu_struct
    uvm_rb_tree_t instance_ptr_table;
    uvm_spinlock_t instance_ptr_table_lock;

-    // This is set to true if the GPU belongs to an SLI group. Else, set to false.
+    // This is set to true if the GPU belongs to an SLI group.
    bool sli_enabled;

    struct
@ -1135,8 +1132,8 @@ struct uvm_parent_gpu_struct
    // environment, rather than using the peer-id field of the PTE (which can
    // only address 8 gpus), all gpus are assigned a 47-bit physical address
    // space by the fabric manager. Any physical address access to these
-    // physical address spaces are routed through the switch to the corresponding
-    // peer.
+    // physical address spaces are routed through the switch to the
+    // corresponding peer.
    struct
    {
        bool is_nvswitch_connected;
@ -1162,6 +1159,16 @@ struct uvm_parent_gpu_struct
        NvU64 memory_window_start;
        NvU64 memory_window_end;
    } system_bus;
+
+    // WAR to issue ATS TLB invalidation commands ourselves.
+    struct
+    {
+        uvm_mutex_t smmu_lock;
+        struct page *smmu_cmdq;
+        void __iomem *smmu_cmdqv_base;
+        unsigned long smmu_prod;
+        unsigned long smmu_cons;
+    } smmu_war;
 };

 static const char *uvm_gpu_name(uvm_gpu_t *gpu)
@ -1351,7 +1358,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
 // They must not be the same gpu.
 uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

-// Get the processor id accessible by the given GPU for the given physical address
+// Get the processor id accessible by the given GPU for the given physical
+// address.
 uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);

 // Get the P2P capabilities between the gpus with the given indexes
@ -1448,9 +1456,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);

 // Check for ECC errors without calling into RM
 //
-// Calling into RM is problematic in many places, this check is always safe to do.
-// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
-// it's required to call uvm_gpu_check_ecc_error() to be sure.
+// Calling into RM is problematic in many places, this check is always safe to
+// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
+// and it's required to call uvm_gpu_check_ecc_error() to be sure.
 NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);

 // Map size bytes of contiguous sysmem on the GPU for physical access
@ -1507,6 +1515,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
 // The GPU must be initialized before calling this function.
 bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

+bool uvm_platform_uses_canonical_form_address(void);
+
 // Returns addr's canonical form for host systems that use canonical form
 // addresses.
 NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@ -1553,8 +1563,9 @@ uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
 // Debug print of GPU properties
 void uvm_gpu_print(uvm_gpu_t *gpu);

-// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
-// half GPU page fault handler uses this to look up the VA space for GPU faults.
+// Add the given instance pointer -> user_channel mapping to this GPU. The
+// bottom half GPU page fault handler uses this to look up the VA space for GPU
+// faults.
 NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
 void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);

--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@ -33,17 +33,17 @@
 #include "uvm_va_space_mm.h"
 #include "uvm_pmm_sysmem.h"
 #include "uvm_perf_module.h"
+#include "uvm_ats_ibm.h"

 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN     1
 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
-#define UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT "2m"
+#define UVM_PERF_ACCESS_COUNTER_GRANULARITY         UVM_ACCESS_COUNTER_GRANULARITY_2M
 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN       1
 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX       ((1 << 16) - 1)
 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT   256

-#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
-#define UVM_ACCESS_COUNTER_ACTION_CLEAR  0x2
-#define UVM_ACCESS_COUNTER_ON_MANAGED    0x4
+#define UVM_ACCESS_COUNTER_ACTION_CLEAR     0x1
+#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED  0x2

 // Each page in a tracked physical range may belong to a different VA Block. We
 // preallocate an array of reverse map translations. However, access counter
@ -54,12 +54,6 @@
 #define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
 #define UVM_SUB_GRANULARITY_REGIONS 32

-// The GPU offers the following tracking granularities: 64K, 2M, 16M, 16G
-//
-// Use the largest granularity to minimize the number of access counter
-// notifications. This is fine because we simply drop the notifications during
-// normal operation, and tests override these values.
-static UVM_ACCESS_COUNTER_GRANULARITY g_uvm_access_counter_granularity;
 static unsigned g_uvm_access_counter_threshold;

 // Per-VA space access counters information
@ -87,7 +81,6 @@ static int uvm_perf_access_counter_momc_migration_enable = -1;
 static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;

 // See module param documentation below
-static char *uvm_perf_access_counter_granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT;
 static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;

 // Module parameters for the tunables
@ -100,10 +93,6 @@ MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
                 "Whether MOMC access counters will trigger migrations."
                 "Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
 module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
-module_param(uvm_perf_access_counter_granularity, charp, S_IRUGO);
-MODULE_PARM_DESC(uvm_perf_access_counter_granularity,
-                 "Size of the physical memory region tracked by each counter. Valid values as"
-                 "of Volta: 64k, 2m, 16m, 16g");
 module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
 MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
                 "Number of remote accesses on a region required to trigger a notification."
@ -136,7 +125,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va

 // Whether access counter migrations are enabled or not. The policy is as
 // follows:
-// - MIMC migrations are enabled by default on P9 systems with ATS support
+// - MIMC migrations are disabled by default on all systems except P9.
 // - MOMC migrations are disabled by default on all systems
 // - Users can override this policy by specifying on/off
 static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -159,7 +148,10 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
    if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
        return false;

-    return g_uvm_global.ats.supported;
+    if (UVM_ATS_IBM_SUPPORTED())
+        return g_uvm_global.ats.supported;
+
+    return false;
 }

 // Create the access counters tracking struct for the given VA space
@ -225,30 +217,18 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
    return NV_OK;
 }

-// Clear the given access counter and add it to the per-GPU clear tracker
-static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
-                                               const uvm_access_counter_buffer_entry_t *entry)
+// Clear the access counter notifications and add it to the per-GPU clear
+// tracker.
+static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
+                                                    uvm_access_counter_buffer_entry_t **notification_start,
+                                                    NvU32 num_notifications)
 {
+    NvU32 i;
    NV_STATUS status;
    uvm_push_t push;
    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;

-    if (entry->address.is_virtual) {
-        status = uvm_push_begin(gpu->channel_manager,
-                                UVM_CHANNEL_TYPE_MEMOPS,
-                                &push,
-                                "Clear access counter with virtual address: 0x%llx",
-                                entry->address.address);
-    }
-    else {
-        status = uvm_push_begin(gpu->channel_manager,
-                                UVM_CHANNEL_TYPE_MEMOPS,
-                                &push,
-                                "Clear access counter with physical address: 0x%llx:%s",
-                                entry->address.address,
-                                uvm_aperture_string(entry->address.aperture));
-    }
-
+    status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
    if (status != NV_OK) {
        UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
                      nvstatusToString(status),
@ -256,7 +236,8 @@ static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
        return status;
    }

-    gpu->parent->host_hal->access_counter_clear_targeted(&push, entry);
+    for (i = 0; i < num_notifications; i++)
+        gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);

    uvm_push_end(&push);

@ -381,25 +362,6 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
        g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
    }

-    if (strcmp(uvm_perf_access_counter_granularity, "64k") == 0) {
-        g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_64K;
-    }
-    else if (strcmp(uvm_perf_access_counter_granularity, "2m") == 0) {
-        g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
-    }
-    else if (strcmp(uvm_perf_access_counter_granularity, "16m") == 0) {
-        g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16M;
-    }
-    else if (strcmp(uvm_perf_access_counter_granularity, "16g") == 0) {
-        g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16G;
-    }
-    else {
-        g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
-        pr_info("Invalid value '%s' for uvm_perf_access_counter_granularity, using '%s' instead",
-                uvm_perf_access_counter_granularity,
-                UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT);
-    }
-
    uvm_assert_mutex_locked(&g_uvm_global.global_lock);
    UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);

@ -422,7 +384,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
    UVM_ASSERT(access_counters->rm_info.bufferSize %
               parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);

-    status = config_granularity_to_bytes(g_uvm_access_counter_granularity, &granularity_bytes);
+    status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
    UVM_ASSERT(status == NV_OK);
    if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
        UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
@ -641,8 +603,8 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
    else {
        UvmGpuAccessCntrConfig default_config =
        {
-            .mimcGranularity = g_uvm_access_counter_granularity,
-            .momcGranularity = g_uvm_access_counter_granularity,
+            .mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
+            .momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
            .mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
            .momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
            .threshold = g_uvm_access_counter_threshold,
@ -767,6 +729,22 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
    return cmp_access_counter_instance_ptr(a, b);
 }

+// Sort comparator for pointers to GVA access counter notification buffer
+// entries that sorts by va_space, and fault address.
+static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
+{
+    const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
+    const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
+
+    int result;
+
+    result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
+    if (result != 0)
+        return result;
+
+    return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
+}
+
 // Sort comparator for pointers to GPA access counter notification buffer
 // entries that sorts by physical address' aperture
 static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
@ -924,12 +902,11 @@ static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,

 // GVA notifications provide an instance_ptr and ve_id that can be directly
 // translated to a VA space. In order to minimize translations, we sort the
-// entries by instance_ptr.
+// entries by instance_ptr, va_space and notification address in that order.
 static void preprocess_virt_notifications(uvm_gpu_t *gpu,
                                          uvm_access_counter_service_batch_context_t *batch_context)
 {
    if (!batch_context->virt.is_single_instance_ptr) {
-        // Sort by instance_ptr
        sort(batch_context->virt.notifications,
             batch_context->virt.num_notifications,
             sizeof(*batch_context->virt.notifications),
@ -938,6 +915,12 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
    }

    translate_virt_notifications_instance_ptrs(gpu, batch_context);
+
+    sort(batch_context->virt.notifications,
+         batch_context->virt.num_notifications,
+         sizeof(*batch_context->virt.notifications),
+         cmp_sort_virt_notifications_by_va_space_address,
+         NULL);
 }

 // GPA notifications provide a physical address and an aperture. Sort
@ -946,7 +929,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
 static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
 {
    if (!batch_context->phys.is_single_aperture) {
-        // Sort by instance_ptr
        sort(batch_context->phys.notifications,
             batch_context->phys.num_notifications,
             sizeof(*batch_context->phys.notifications),
@ -955,6 +937,28 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
    }
 }

+static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
+                                                uvm_access_counter_buffer_entry_t **notification_start,
+                                                NvU32 num_entries,
+                                                NvU32 flags)
+{
+    NV_STATUS status = NV_OK;
+
+    if (uvm_enable_builtin_tests) {
+        // TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
+        //                    to va_space instead of broadcasting.
+        NvU32 i;
+
+        for (i = 0; i < num_entries; i++)
+            uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
+    }
+
+    if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
+        status = access_counter_clear_notifications(gpu, notification_start, num_entries);
+
+    return status;
+}
+
 static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
                                         uvm_va_block_t *va_block,
                                         uvm_va_block_retry_t *va_block_retry,
@ -1163,7 +1167,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
                                              const uvm_access_counter_buffer_entry_t *current_entry,
                                              const uvm_reverse_map_t *reverse_mappings,
                                              size_t num_reverse_mappings,
-                                              unsigned *out_flags)
+                                              NvU32 *out_flags)
 {
    size_t index;
    uvm_va_block_t *va_block = reverse_mappings[0].va_block;
@ -1190,7 +1194,6 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
        // If an mm is registered with the VA space, we have to retain it
        // in order to lock it before locking the VA space.
        mm = uvm_va_space_mm_retain_lock(va_space);
-
        uvm_va_space_down_read(va_space);

        // Re-check that the VA block is valid after taking the VA block lock.
@ -1251,7 +1254,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
                                        const uvm_access_counter_buffer_entry_t *current_entry,
                                        const uvm_reverse_map_t *reverse_mappings,
                                        size_t num_reverse_mappings,
-                                        unsigned *out_flags)
+                                        NvU32 *out_flags)
 {
    NV_STATUS status = NV_OK;
    size_t index;
@ -1259,7 +1262,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
    *out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;

    for (index = 0; index < num_reverse_mappings; ++index) {
-        unsigned out_flags_local = 0;
+        NvU32 out_flags_local = 0;
        status = service_phys_single_va_block(gpu,
                                              batch_context,
                                              current_entry,
@ -1318,7 +1321,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
                                                       NvU64 address,
                                                       unsigned long sub_granularity,
                                                       size_t *num_reverse_mappings,
-                                                       unsigned *out_flags)
+                                                       NvU32 *out_flags)
 {
    NV_STATUS status;
    NvU32 region_start, region_end;
@ -1327,7 +1330,10 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,

    // Get the reverse_map translations for all the regions set in the
    // sub_granularity field of the counter.
-    for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
+    for_each_sub_granularity_region(region_start,
+                                    region_end,
+                                    sub_granularity,
+                                    config->sub_granularity_regions_per_translation) {
        NvU64 local_address = address + region_start * config->sub_granularity_region_size;
        NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
        uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
@ -1376,7 +1382,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
 static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
                                           uvm_access_counter_service_batch_context_t *batch_context,
                                           const uvm_access_counter_buffer_entry_t *current_entry,
-                                           unsigned *out_flags)
+                                           NvU32 *out_flags)
 {
    NvU64 address;
    NvU64 translation_index;
@ -1387,7 +1393,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
    size_t total_reverse_mappings = 0;
    uvm_gpu_t *resident_gpu = NULL;
    NV_STATUS status = NV_OK;
-    unsigned flags = 0;
+    NvU32 flags = 0;

    address = current_entry->address.address;
    UVM_ASSERT(address % config->translation_size == 0);
@ -1415,7 +1421,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,

    for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
        size_t num_reverse_mappings;
-        unsigned out_flags_local = 0;
+        NvU32 out_flags_local = 0;
        status = service_phys_notification_translation(gpu,
                                                       resident_gpu,
                                                       batch_context,
@ -1437,11 +1443,8 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
        sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
    }

-    // Currently we only report events for our tests, not for tools
-    if (uvm_enable_builtin_tests) {
-        *out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
-        *out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
-    }
+    if (uvm_enable_builtin_tests)
+        *out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);

    if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
        *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
@ -1454,22 +1457,21 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
                                            uvm_access_counter_service_batch_context_t *batch_context)
 {
    NvU32 i;
+    uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
+
    preprocess_phys_notifications(batch_context);

    for (i = 0; i < batch_context->phys.num_notifications; ++i) {
        NV_STATUS status;
-        uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
-        unsigned flags = 0;
+        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
+        NvU32 flags = 0;

        if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
            continue;

        status = service_phys_notification(gpu, batch_context, current_entry, &flags);
-        if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
-            uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);

-        if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
-            status = access_counter_clear_targeted(gpu, current_entry);
+        notify_tools_and_process_flags(gpu, &notifications[i], 1, flags);

        if (status != NV_OK)
            return status;
@ -1478,152 +1480,218 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
    return NV_OK;
 }

-static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
+static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
+                                                      uvm_va_block_t *va_block,
+                                                      uvm_processor_id_t processor,
+                                                      uvm_access_counter_service_batch_context_t *batch_context)
 {
-    return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
-                                 *(uvm_gpu_phys_address_t*)_b);
-}
+    uvm_va_block_retry_t va_block_retry;
+    uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
+    uvm_service_block_context_t *service_context = &batch_context->block_service_context;

-static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
-{
-    if (a.aperture != b.aperture)
-        return false;
-
-    UVM_ASSERT(is_power_of_2(granularity));
-
-    return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
-}
-
-static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
-                                                NvU64 region_size,
-                                                NvU64 sub_region_size,
-                                                NvU32 accessed_mask)
-{
-    const unsigned accessed_index = (address.address % region_size) / sub_region_size;
-
-    // accessed_mask is only filled for tracking granularities larger than 64K
-    if (region_size == UVM_PAGE_SIZE_64K)
-        return true;
-
-    UVM_ASSERT(accessed_index < 32);
-    return ((1 << accessed_index) & accessed_mask) != 0;
-}
-
-static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
-                                           uvm_access_counter_service_batch_context_t *batch_context,
-                                           const uvm_access_counter_buffer_entry_t *current_entry,
-                                           unsigned *out_flags)
-{
-    NV_STATUS status = NV_OK;
-    NvU64 notification_size;
-    NvU64 address;
-    uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
-    uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
-    int num_addresses = 0;
-    int i;
-
-    // Virtual address notifications are always 64K aligned
-    NvU64 region_start = current_entry->address.address;
-    NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
-    
-
-    uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
-    uvm_access_counter_type_t counter_type = current_entry->counter_type;
-
-    const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
-
-    uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
-
-    UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
-
-    // Entries with NULL va_space are simply dropped.
-    if (!va_space)
+    if (uvm_page_mask_empty(accessed_pages))
        return NV_OK;

-    status = config_granularity_to_bytes(config->rm.granularity, &notification_size);
-    if (status != NV_OK)
-        return status;
+    uvm_assert_mutex_locked(&va_block->lock);

-    // Collect physical locations that could have been touched
-    // in the reported 64K VA region. The notification mask can
-    // correspond to any of them.
-    uvm_va_space_down_read(va_space);
-    for (address = region_start; address < region_end;) {
-        uvm_va_block_t *va_block;
+    service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
+    service_context->num_retries = 0;
+    service_context->block_context.mm = mm;

-        NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
-        if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
-            address += PAGE_SIZE;
-            continue;
-        }
+    return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
+                                     &va_block_retry,
+                                     service_va_block_locked(processor,
+                                                             va_block,
+                                                             &va_block_retry,
+                                                             service_context,
+                                                             accessed_pages));
+}

-        uvm_mutex_lock(&va_block->lock);
-        while (address < va_block->end && address < region_end) {
-            const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
+static void expand_notification_block(struct mm_struct *mm,
+                                      uvm_gpu_va_space_t *gpu_va_space,
+                                      uvm_va_block_t *va_block,
+                                      uvm_page_mask_t *accessed_pages,
+                                      const uvm_access_counter_buffer_entry_t *current_entry)
+{
+    NvU64 addr;
+    NvU64 granularity = 0;
+    uvm_gpu_t *resident_gpu = NULL;
+    uvm_processor_id_t resident_id;
+    uvm_page_index_t page_index;
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
+    const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
+                                                                             UVM_ACCESS_COUNTER_TYPE_MIMC);

-            // UVM va_block always maps the closest resident location to processor
-            const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
+    config_granularity_to_bytes(config->rm.granularity, &granularity);

-            // Add physical location if it's valid and not local vidmem
-            if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
-                uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
-                if (phys_address_in_accessed_sub_region(phys_address,
-                                                        notification_size,
-                                                        config->sub_granularity_region_size,
-                                                        current_entry->sub_granularity)) {
-                    resident_processors[num_addresses] = res_id;
-                    phys_addresses[num_addresses] = phys_address;
-                    ++num_addresses;
-                }
-                else {
-                    UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
-                                     phys_address.address,
-                                     uvm_aperture_string(phys_address.aperture),
-                                     current_entry->sub_granularity);
-                }
-            }
+    // Granularities other than 2MB can only be enabled by UVM tests. Do nothing
+    // in that case.
+    if (granularity != UVM_PAGE_SIZE_2M)
+        return;

-            address += PAGE_SIZE;
-        }
-        uvm_mutex_unlock(&va_block->lock);
+    addr = current_entry->address.address;
+
+    uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
+    uvm_assert_mutex_locked(&va_block->lock);
+
+    page_index = uvm_va_block_cpu_page_index(va_block, addr);
+
+    resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
+
+    // resident_id might be invalid or might already be the same as the GPU
+    // which received the notification if the memory was already migrated before
+    // acquiring the locks either during the servicing of previous notifications
+    // or during faults or because of explicit migrations or if the VA range was
+    // freed after receving the notification. Return NV_OK in such cases.
+    if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
+        return;
+
+    if (UVM_ID_IS_GPU(resident_id))
+        resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
+
+    if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
+        uvm_page_mask_set(accessed_pages, page_index);
    }
-    uvm_va_space_up_read(va_space);
+    else {
+        NvU32 region_start;
+        NvU32 region_end;
+        unsigned long sub_granularity = current_entry->sub_granularity;
+        NvU32 num_regions = config->sub_granularity_regions_per_translation;
+        NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
+        uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id);

-    // The addresses need to be sorted to aid coalescing.
-    sort(phys_addresses,
-         num_addresses,
-         sizeof(*phys_addresses),
-         cmp_sort_gpu_phys_addr,
-         NULL);
+        UVM_ASSERT(num_sub_pages >= 1);

-    for (i = 0; i < num_addresses; ++i) {
-        uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
-
-        // Skip the current pointer if the physical region was already handled
-        if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
-            UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
-            continue;
+        // region_start and region_end refer to sub_granularity indices, not
+        // page_indices.
+        for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
+            uvm_page_mask_region_fill(accessed_pages,
+                                      uvm_va_block_region(region_start * num_sub_pages,
+                                                          region_end * num_sub_pages));
        }
-        UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
-                         i,
-                         num_addresses,
-                         phys_addresses[i].address,
-                         notification_size - 1,
-                         uvm_aperture_string(phys_addresses[i].aperture),
-                         uvm_gpu_name(gpu));

-        // Construct a fake phys addr AC entry
-        fake_entry->counter_type = current_entry->counter_type;
-        fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
-        fake_entry->address.aperture = phys_addresses[i].aperture;
-        fake_entry->address.is_virtual = false;
-        fake_entry->physical_info.resident_id = resident_processors[i];
-        fake_entry->counter_value = current_entry->counter_value;
-        fake_entry->sub_granularity = current_entry->sub_granularity;
+        // Remove pages in the va_block which are not resident on resident_id.
+        // If the GPU is heavily accessing those pages, future access counter
+        // migrations will migrate them to the GPU.
+        uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
+    }
+}

-        status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
-        if (status != NV_OK)
+static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
+                                                     uvm_gpu_va_space_t *gpu_va_space,
+                                                     uvm_va_block_t *va_block,
+                                                     uvm_access_counter_service_batch_context_t *batch_context,
+                                                     NvU32 index,
+                                                     NvU32 *out_index)
+{
+    NvU32 i = index;
+    NvU32 flags = 0;
+    NV_STATUS status = NV_OK;
+    NV_STATUS flags_status;
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
+    uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
+
+    UVM_ASSERT(va_block);
+    UVM_ASSERT(i < batch_context->virt.num_notifications);
+
+    uvm_assert_rwsem_locked(&va_space->lock);
+
+    uvm_page_mask_zero(accessed_pages);
+
+    uvm_mutex_lock(&va_block->lock);
+
+    while (i < batch_context->virt.num_notifications) {
+        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
+        NvU64 address = current_entry->address.address;
+
+        if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
+            *out_index = i;
            break;
+        }
+
+        expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
+
+        i++;
+        *out_index = i;
+    }
+
+    status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
+
+    uvm_mutex_unlock(&va_block->lock);
+
+    // Atleast one notification should have been processed.
+    UVM_ASSERT(index < *out_index);
+
+    if (status == NV_OK)
+        flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
+
+    flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
+
+    if ((status == NV_OK) && (flags_status != NV_OK))
+        status = flags_status;
+
+    return status;
+}
+
+static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
+                                                  uvm_gpu_va_space_t *gpu_va_space,
+                                                  uvm_access_counter_service_batch_context_t *batch_context,
+                                                  NvU32 index,
+                                                  NvU32 *out_index)
+{
+    NV_STATUS status;
+    uvm_va_block_t *va_block;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
+    NvU64 address = current_entry->address.address;
+
+    UVM_ASSERT(va_space);
+
+    uvm_assert_rwsem_locked(&va_space->lock);
+
+    // Virtual address notifications are always 64K aligned
+    UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
+
+    // TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
+    //                    migrations for virtual notifications on configs with
+    //                    4KB page size
+    status = uvm_va_block_find(va_space, address, &va_block);
+    if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
+
+        UVM_ASSERT(va_block);
+
+        status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
+    }
+    else {
+        NvU32 flags = 0;
+
+        UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
+                   (status == NV_ERR_INVALID_ADDRESS)  ||
+                   uvm_va_block_is_hmm(va_block));
+
+        // NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
+        // VA block has been allocated yet. This can happen if there are stale
+        // notifications in the batch. A new VA range may have been allocated in
+        // that range. So, clear the notification entry to continue getting
+        // notifications for the new VA range.
+        if (status == NV_ERR_OBJECT_NOT_FOUND)
+            flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
+
+        // NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
+        // doesn't exist or it's not a managed range. Access counter migrations
+        // are not currently supported on such ranges.
+        //
+        // TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
+        // When support for SAM migrations is addded, clear the notification
+        // entry if the VA range doesn't exist in order to receive notifications
+        // when a new VA range is allocated in that region.
+        status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
+        *out_index = index + 1;
+
+        status = NV_OK;
    }

    return status;
@ -1632,33 +1700,67 @@ static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
 static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
                                            uvm_access_counter_service_batch_context_t *batch_context)
 {
-    NvU32 i;
+    NvU32 i = 0;
    NV_STATUS status = NV_OK;
+    struct mm_struct *mm = NULL;
+    uvm_va_space_t *va_space = NULL;
+    uvm_va_space_t *prev_va_space = NULL;
+    uvm_gpu_va_space_t *gpu_va_space = NULL;
+
+    // TODO: Bug 4299018 : Add support for virtual access counter migrations on
+    //                     4K page sizes.
+    if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
+        return notify_tools_and_process_flags(gpu,
+                                              batch_context->virt.notifications,
+                                              batch_context->virt.num_notifications,
+                                              0);
+    }
+
    preprocess_virt_notifications(gpu, batch_context);

-    for (i = 0; i < batch_context->virt.num_notifications; ++i) {
-        unsigned flags = 0;
+    while (i < batch_context->virt.num_notifications) {
        uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
+        va_space = current_entry->virtual_info.va_space;

-        status = service_virt_notification(gpu, batch_context, current_entry, &flags);
+        if (va_space != prev_va_space) {

-        UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
-                         i + 1,
-                         batch_context->virt.num_notifications,
-                         (flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
-                         status,
-                         (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
+            // New va_space detected, drop locks of the old va_space.
+            if (prev_va_space) {
+                uvm_va_space_up_read(prev_va_space);
+                uvm_va_space_mm_release_unlock(prev_va_space, mm);

-        if (uvm_enable_builtin_tests)
-            uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
+                mm = NULL;
+                gpu_va_space = NULL;
+            }

-        if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
-            status = access_counter_clear_targeted(gpu, current_entry);
+            // Acquire locks for the new va_space.
+            if (va_space) {
+                mm = uvm_va_space_mm_retain_lock(va_space);
+                uvm_va_space_down_read(va_space);
+
+                gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
+            }
+
+            prev_va_space = va_space;
+        }
+
+        if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
+            status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
+        }
+        else {
+            status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
+            i++;
+        }

        if (status != NV_OK)
            break;
    }

+    if (va_space) {
+        uvm_va_space_up_read(va_space);
+        uvm_va_space_mm_release_unlock(va_space, mm);
+    }
+
    return status;
 }

@ -1941,6 +2043,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
    }
    else {
        uvm_access_counter_buffer_entry_t entry = { 0 };
+        uvm_access_counter_buffer_entry_t *notification = &entry;

        if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
            entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
@ -1950,7 +2053,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
        entry.bank = params->bank;
        entry.tag = params->tag;

-        status = access_counter_clear_targeted(gpu, &entry);
+        status = access_counter_clear_notifications(gpu, &notification, 1);
    }

    if (status == NV_OK)
--- a/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_non_replayable_faults.c
@ -235,17 +235,27 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
    return NV_OK;
 }

-// In SRIOV, the UVM (guest) driver does not have access to the privileged
-// registers used to clear the faulted bit. Instead, UVM requests host RM to do
-// the clearing on its behalf, using a SW method.
 static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
 {
-    if (uvm_gpu_is_virt_mode_sriov(gpu)) {
-        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
-        return true;
-    }
+    // If true, UVM uses a SW method to request RM to do the clearing on its
+    // behalf.
+    bool use_sw_method = false;

-    return false;
+    // In SRIOV, the UVM (guest) driver does not have access to the privileged
+    // registers used to clear the faulted bit.
+    if (uvm_gpu_is_virt_mode_sriov(gpu))
+        use_sw_method = true;
+
+    // In Confidential Computing access to the privileged registers is blocked,
+    // in order to prevent interference between guests, or between the
+    // (untrusted) host and the guests.
+    if (g_uvm_global.conf_computing_enabled)
+        use_sw_method = true;
+
+    if (use_sw_method)
+        UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
+
+    return use_sw_method;
 }

 static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
@ -570,7 +580,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,

        ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;

-        ats_invalidate->write_faults_in_batch = false;
+        ats_invalidate->tlb_batch_pending = false;

        va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);

--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@ -1180,7 +1180,11 @@ static void mark_fault_fatal(uvm_fault_service_batch_context_t *batch_context,
    fault_entry->replayable.cancel_va_mode = cancel_va_mode;

    utlb->has_fatal_faults = true;
-    batch_context->has_fatal_faults = true;
+
+    if (!batch_context->fatal_va_space) {
+        UVM_ASSERT(fault_entry->va_space);
+        batch_context->fatal_va_space = fault_entry->va_space;
+    }
 }

 static void fault_entry_duplicate_flags(uvm_fault_service_batch_context_t *batch_context,
@ -1511,7 +1515,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,

    ++block_context->num_retries;

-    if (status == NV_OK && batch_context->has_fatal_faults)
+    if (status == NV_OK && batch_context->fatal_va_space)
        status = uvm_va_block_set_cancel(va_block, &block_context->block_context, gpu);

    return status;
@ -1937,14 +1941,198 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
    return status;
 }

+// Called when a fault in the batch has been marked fatal. Flush the buffer
+// under the VA and mmap locks to remove any potential stale fatal faults, then
+// service all new faults for just that VA space and cancel those which are
+// fatal. Faults in other VA spaces are replayed when done and will be processed
+// when normal fault servicing resumes.
+static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
+{
+    NV_STATUS status = NV_OK;
+    NvU32 i;
+    uvm_va_space_t *va_space = batch_context->fatal_va_space;
+    uvm_gpu_va_space_t *gpu_va_space = NULL;
+    struct mm_struct *mm;
+    uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
+    uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
+    uvm_va_block_context_t *va_block_context = &service_context->block_context;
+
+    UVM_ASSERT(gpu->parent->replayable_faults_supported);
+    UVM_ASSERT(va_space);
+
+    // Perform the flush and re-fetch while holding the mmap_lock and the
+    // VA space lock. This avoids stale faults because it prevents any vma
+    // modifications (mmap, munmap, mprotect) from happening between the time HW
+    // takes the fault and we cancel it.
+    mm = uvm_va_space_mm_retain_lock(va_space);
+    va_block_context->mm = mm;
+    uvm_va_space_down_read(va_space);
+
+    // We saw fatal faults in this VA space before. Flush while holding
+    // mmap_lock to make sure those faults come back (aren't stale).
+    //
+    // We need to wait until all old fault messages have arrived before
+    // flushing, hence UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT.
+    status = fault_buffer_flush_locked(gpu,
+                                       UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
+                                       UVM_FAULT_REPLAY_TYPE_START,
+                                       batch_context);
+    if (status != NV_OK)
+        goto done;
+
+    // Wait for the flush's replay to finish to give the legitimate faults a
+    // chance to show up in the buffer again.
+    status = uvm_tracker_wait(&replayable_faults->replay_tracker);
+    if (status != NV_OK)
+        goto done;
+
+    // We expect all replayed faults to have arrived in the buffer so we can re-
+    // service them. The replay-and-wait sequence above will ensure they're all
+    // in the HW buffer. When GSP owns the HW buffer, we also have to wait for
+    // GSP to copy all available faults from the HW buffer into the shadow
+    // buffer.
+    //
+    // TODO: Bug 2533557: This flush does not actually guarantee that GSP will
+    //       copy over all faults.
+    status = hw_fault_buffer_flush_locked(gpu->parent);
+    if (status != NV_OK)
+        goto done;
+
+    // If there is no GPU VA space for the GPU, ignore all faults in the VA
+    // space. This can happen if the GPU VA space has been destroyed since we
+    // unlocked the VA space in service_fault_batch. That means the fatal faults
+    // are stale, because unregistering the GPU VA space requires preempting the
+    // context and detaching all channels in that VA space. Restart fault
+    // servicing from the top.
+    gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
+    if (!gpu_va_space)
+        goto done;
+
+    // Re-parse the new faults
+    batch_context->num_invalid_prefetch_faults = 0;
+    batch_context->num_duplicate_faults        = 0;
+    batch_context->num_replays                 = 0;
+    batch_context->fatal_va_space              = NULL;
+    batch_context->has_throttled_faults        = false;
+
+    status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
+    if (status != NV_OK)
+        goto done;
+
+    // No more faults left. Either the previously-seen fatal entry was stale, or
+    // RM killed the context underneath us.
+    if (batch_context->num_cached_faults == 0)
+        goto done;
+
+    ++batch_context->batch_id;
+
+    status = preprocess_fault_batch(gpu, batch_context);
+    if (status != NV_OK) {
+        if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
+            // Another flush happened due to stale faults or a context-fatal
+            // error. The previously-seen fatal fault might not exist anymore,
+            // so restart fault servicing from the top.
+            status = NV_OK;
+        }
+
+        goto done;
+    }
+
+    // Search for the target VA space
+    for (i = 0; i < batch_context->num_coalesced_faults; i++) {
+        uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
+        UVM_ASSERT(current_entry->va_space);
+        if (current_entry->va_space == va_space)
+            break;
+    }
+
+    while (i < batch_context->num_coalesced_faults) {
+        uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
+
+        if (current_entry->va_space != va_space)
+            break;
+
+        // service_fault_batch_dispatch() doesn't expect unserviceable faults.
+        // Just cancel them directly.
+        if (current_entry->is_fatal) {
+            status = cancel_fault_precise_va(gpu, current_entry, UVM_FAULT_CANCEL_VA_MODE_ALL);
+            if (status != NV_OK)
+                break;
+
+            ++i;
+        }
+        else {
+            uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
+            NvU32 block_faults;
+
+            ats_invalidate->tlb_batch_pending = false;
+            uvm_hmm_service_context_init(service_context);
+
+            // Service all the faults that we can. We only really need to search
+            // for fatal faults, but attempting to service all is the easiest
+            // way to do that.
+            status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
+            if (status != NV_OK) {
+                // TODO: Bug 3900733: clean up locking in service_fault_batch().
+                // We need to drop lock and retry. That means flushing and
+                // starting over.
+                if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
+                    status = NV_OK;
+
+                break;
+            }
+
+            // Invalidate TLBs before cancel to ensure that fatal faults don't
+            // get stuck in HW behind non-fatal faults to the same line.
+            status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
+            if (status != NV_OK)
+                break;
+
+            while (block_faults-- > 0) {
+                current_entry = batch_context->ordered_fault_cache[i];
+                if (current_entry->is_fatal) {
+                    status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
+                    if (status != NV_OK)
+                        break;
+                }
+
+                ++i;
+            }
+        }
+    }
+
+done:
+    uvm_va_space_up_read(va_space);
+    uvm_va_space_mm_release_unlock(va_space, mm);
+
+    if (status == NV_OK) {
+        // There are two reasons to flush the fault buffer here.
+        //
+        // 1) Functional. We need to replay both the serviced non-fatal faults
+        //    and the skipped faults in other VA spaces. The former need to be
+        //    restarted and the latter need to be replayed so the normal fault
+        //    service mechanism can fetch and process them.
+        //
+        // 2) Performance. After cancelling the fatal faults, a flush removes
+        //    any potential duplicated fault that may have been added while
+        //    processing the faults in this batch. This flush also avoids doing
+        //    unnecessary processing after the fatal faults have been cancelled,
+        //    so all the rest are unlikely to remain after a replay because the
+        //    context is probably in the process of dying.
+        status = fault_buffer_flush_locked(gpu,
+                                           UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
+                                           UVM_FAULT_REPLAY_TYPE_START,
+                                           batch_context);
+    }
+
+    return status;
+}
 // Scan the ordered view of faults and group them by different va_blocks
 // (managed faults) and service faults for each va_block, in batch.
 // Service non-managed faults one at a time as they are encountered during the
 // scan.
 //
-// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
-// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
-// space
+// Fatal faults are marked for later processing by the caller.
 static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
                                     fault_service_mode_t service_mode,
                                     uvm_fault_service_batch_context_t *batch_context)
@ -1963,7 +2151,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,

    UVM_ASSERT(gpu->parent->replayable_faults_supported);

-    ats_invalidate->write_faults_in_batch = false;
+    ats_invalidate->tlb_batch_pending = false;
    uvm_hmm_service_context_init(service_context);

    for (i = 0; i < batch_context->num_coalesced_faults;) {
@ -1998,38 +2186,25 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
            va_block_context->mm = mm;

            uvm_va_space_down_read(va_space);
-
            gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
-            if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
-                status = fault_buffer_flush_locked(gpu,
-                                                   UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
-                                                   UVM_FAULT_REPLAY_TYPE_START,
-                                                   batch_context);
-                if (status == NV_OK)
-                    status = NV_WARN_MORE_PROCESSING_REQUIRED;
-
-                break;
-            }
-
-            // The case where there is no valid GPU VA space for the GPU in this
-            // VA space is handled next
        }

        // Some faults could be already fatal if they cannot be handled by
        // the UVM driver
        if (current_entry->is_fatal) {
            ++i;
-            batch_context->has_fatal_faults = true;
+            if (!batch_context->fatal_va_space)
+                batch_context->fatal_va_space = va_space;
+
            utlb->has_fatal_faults = true;
            UVM_ASSERT(utlb->num_pending_faults > 0);
            continue;
        }

-        if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
+        if (!gpu_va_space) {
            // If there is no GPU VA space for the GPU, ignore the fault. This
            // can happen if a GPU VA space is destroyed without explicitly
-            // freeing all memory ranges (destroying the VA range triggers a
-            // flush of the fault buffer) and there are stale entries in the
+            // freeing all memory ranges and there are stale entries in the
            // buffer that got fixed by the servicing in a previous batch.
            ++i;
            continue;
@ -2057,7 +2232,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
        i += block_faults;

        // Don't issue replays in cancel mode
-        if (replay_per_va_block && !batch_context->has_fatal_faults) {
+        if (replay_per_va_block && !batch_context->fatal_va_space) {
            status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
            if (status != NV_OK)
                goto fail;
@ -2069,8 +2244,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
        }
    }

-    // Only clobber status if invalidate_status != NV_OK, since status may also
-    // contain NV_WARN_MORE_PROCESSING_REQUIRED.
    if (va_space != NULL) {
        NV_STATUS invalidate_status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
        if (invalidate_status != NV_OK)
@ -2278,64 +2451,6 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
    return false;
 }

-// Cancel just the faults flagged as fatal in the given fault service batch
-// context.
-static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
-{
-    NV_STATUS status = NV_OK;
-    NV_STATUS fault_status;
-    uvm_va_space_t *va_space = NULL;
-    NvU32 i;
-
-    UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
-
-    for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
-        uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
-
-        UVM_ASSERT(current_entry->va_space);
-
-        if (current_entry->va_space != va_space) {
-            // Fault on a different va_space, drop the lock of the old one...
-            if (va_space != NULL)
-                uvm_va_space_up_read(va_space);
-
-            va_space = current_entry->va_space;
-
-            // ... and take the lock of the new one
-            uvm_va_space_down_read(va_space);
-
-            // We don't need to check whether a buffer flush is required
-            // (due to VA range destruction). Once a fault is flagged as fatal
-            // we need to cancel it, even if its VA range no longer exists.
-        }
-
-        // See the comment for the same check in cancel_faults_all
-        if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id))
-            continue;
-
-        if (current_entry->is_fatal) {
-            status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
-            if (status != NV_OK)
-                break;
-        }
-    }
-
-    if (va_space != NULL)
-        uvm_va_space_up_read(va_space);
-
-    // See the comment on flushing in cancel_faults_all
-    fault_status = fault_buffer_flush_locked(gpu,
-                                             UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
-                                             UVM_FAULT_REPLAY_TYPE_START,
-                                             batch_context);
-
-    // We report the first encountered error.
-    if (status == NV_OK)
-        status = fault_status;
-
-    return status;
-}
-
 // Cancel all faults in the given fault service batch context, even those not
 // marked as fatal.
 static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
@ -2344,56 +2459,51 @@ static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
 {
    NV_STATUS status = NV_OK;
    NV_STATUS fault_status;
-    uvm_va_space_t *va_space = NULL;
-    NvU32 i;
+    NvU32 i = 0;

    UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
    UVM_ASSERT(reason != UvmEventFatalReasonInvalid);

-    for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
+    while (i < batch_context->num_coalesced_faults && status == NV_OK) {
        uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
-        uvm_fault_cancel_va_mode_t cancel_va_mode;
+        uvm_va_space_t *va_space = current_entry->va_space;
+        bool skip_va_space;

-        UVM_ASSERT(current_entry->va_space);
+        UVM_ASSERT(va_space);

-        if (current_entry->va_space != va_space) {
-            // Fault on a different va_space, drop the lock of the old one...
-            if (va_space != NULL)
-                uvm_va_space_up_read(va_space);
+        uvm_va_space_down_read(va_space);

-            va_space = current_entry->va_space;
+        // If there is no GPU VA space for the GPU, ignore all faults in
+        // that VA space. This can happen if the GPU VA space has been
+        // destroyed since we unlocked the VA space in service_fault_batch.
+        // Ignoring the fault avoids targetting a PDB that might have been
+        // reused by another process.
+        skip_va_space = !uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);

-            // ... and take the lock of the new one
-            uvm_va_space_down_read(va_space);
+        for (;
+             i < batch_context->num_coalesced_faults && current_entry->va_space == va_space;
+             current_entry = batch_context->ordered_fault_cache[++i]) {
+            uvm_fault_cancel_va_mode_t cancel_va_mode;
+
+            if (skip_va_space)
+                continue;
+
+            if (current_entry->is_fatal) {
+                UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
+                cancel_va_mode = current_entry->replayable.cancel_va_mode;
+            }
+            else {
+                current_entry->fatal_reason = reason;
+                cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
+            }
+
+            status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
+            if (status != NV_OK)
+                break;
        }

-        if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
-            // If there is no GPU VA space for the GPU, ignore the fault.
-            // This can happen if the GPU VA did not exist in
-            // service_fault_batch(), or it was destroyed since then.
-            // This is to avoid targetting a PDB that might have been reused
-            // by another process.
-            continue;
-        }
-
-        // If the fault was already marked fatal, use its reason and cancel
-        // mode. Otherwise use the provided reason.
-        if (current_entry->is_fatal) {
-            UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
-            cancel_va_mode = current_entry->replayable.cancel_va_mode;
-        }
-        else {
-            current_entry->fatal_reason = reason;
-            cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
-        }
-
-        status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
-        if (status != NV_OK)
-            break;
-    }
-
-    if (va_space != NULL)
        uvm_va_space_up_read(va_space);
+    }

    // Because each cancel itself triggers a replay, there may be a large number
    // of new duplicated faults in the buffer after cancelling all the known
@ -2537,7 +2647,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat

        batch_context->num_invalid_prefetch_faults = 0;
        batch_context->num_replays                 = 0;
-        batch_context->has_fatal_faults            = false;
+        batch_context->fatal_va_space              = NULL;
        batch_context->has_throttled_faults        = false;

        // 5) Fetch all faults from buffer
@ -2584,9 +2694,6 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
        // 8) Service all non-fatal faults and mark all non-serviceable faults
        // as fatal
        status = service_fault_batch(gpu, FAULT_SERVICE_MODE_CANCEL, batch_context);
-        if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
-            continue;
-
        UVM_ASSERT(batch_context->num_replays == 0);
        if (status == NV_ERR_NO_MEMORY)
            continue;
@ -2594,7 +2701,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
            break;

        // No more fatal faults left, we are done
-        if (!batch_context->has_fatal_faults)
+        if (!batch_context->fatal_va_space)
            break;

        // 9) Search for uTLBs that contain fatal faults and meet the
@ -2616,9 +2723,9 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat

 static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
 {
-    UVM_ASSERT(batch_context->has_fatal_faults);
+    UVM_ASSERT(batch_context->fatal_va_space);
    if (gpu->parent->fault_cancel_va_supported)
-        return cancel_faults_precise_va(gpu, batch_context);
+        return service_fault_batch_for_cancel(gpu, batch_context);

    return cancel_faults_precise_tlb(gpu, batch_context);
 }
@ -2674,7 +2781,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
        batch_context->num_invalid_prefetch_faults = 0;
        batch_context->num_duplicate_faults        = 0;
        batch_context->num_replays                 = 0;
-        batch_context->has_fatal_faults            = false;
+        batch_context->fatal_va_space              = NULL;
        batch_context->has_throttled_faults        = false;

        status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
@ -2702,9 +2809,6 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
        // was flushed
        num_replays += batch_context->num_replays;

-        if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
-            continue;
-
        enable_disable_prefetch_faults(gpu->parent, batch_context);

        if (status != NV_OK) {
@ -2718,10 +2822,17 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
            break;
        }

-        if (batch_context->has_fatal_faults) {
+        if (batch_context->fatal_va_space) {
            status = uvm_tracker_wait(&batch_context->tracker);
-            if (status == NV_OK)
+            if (status == NV_OK) {
                status = cancel_faults_precise(gpu, batch_context);
+                if (status == NV_OK) {
+                    // Cancel handling should've issued at least one replay
+                    UVM_ASSERT(batch_context->num_replays > 0);
+                    ++num_batches;
+                    continue;
+                }
+            }

            break;
        }
--- a/kernel-open/nvidia-uvm/uvm_hopper.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper.c
@ -103,5 +103,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->map_remap_larger_page_promotion = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = true;
 }

--- a/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_hopper_mmu.c
@ -33,6 +33,7 @@

 #include "uvm_types.h"
 #include "uvm_global.h"
+#include "uvm_common.h"
 #include "uvm_hal.h"
 #include "uvm_hal_types.h"
 #include "uvm_hopper_fault_buffer.h"
@ -42,6 +43,10 @@
 #define MMU_BIG 0
 #define MMU_SMALL 1

+// Used in pde_pcf().
+#define ATS_ALLOWED 0
+#define ATS_NOT_ALLOWED 1
+
 uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
 {
    if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
@ -260,7 +265,108 @@ static NvU64 poisoned_pte_hopper(void)
    return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
 }

-static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
+typedef enum
+{
+    PDE_TYPE_SINGLE,
+    PDE_TYPE_DUAL_BIG,
+    PDE_TYPE_DUAL_SMALL,
+    PDE_TYPE_COUNT,
+} pde_type_t;
+
+static const NvU8 valid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED },
+                                     { NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED },
+                                     { NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED,
+                                       NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED } };
+
+static const NvU8 invalid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED },
+                                       { NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED },
+                                       { NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED,
+                                         NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED } };
+
+static const NvU8 va_base[] = { 56, 47, 38, 29, 21 };
+
+static bool is_ats_range_valid(uvm_page_directory_t *dir, NvU32 child_index)
+{
+    NvU64 pde_base_va;
+    NvU64 min_va_upper;
+    NvU64 max_va_lower;
+    NvU32 index_in_dir;
+
+    uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
+
+    UVM_ASSERT(dir->depth < ARRAY_SIZE(va_base));
+
+    // We can use UVM_PAGE_SIZE_AGNOSTIC because page_size is only used in
+    // index_bits_hopper() for PTE table, i.e., depth 5+, which does not use a
+    // PDE PCF or an ATS_ALLOWED/NOT_ALLOWED setting.
+    UVM_ASSERT(child_index < (1ull << index_bits_hopper(dir->depth, UVM_PAGE_SIZE_AGNOSTIC)));
+
+    pde_base_va = 0;
+    index_in_dir = child_index;
+    while (dir) {
+        pde_base_va += index_in_dir * (1ull << va_base[dir->depth]);
+        index_in_dir = dir->index_in_parent;
+        dir = dir->host_parent;
+    }
+    pde_base_va = (NvU64)((NvS64)(pde_base_va << (64 - num_va_bits_hopper())) >> (64 - num_va_bits_hopper()));
+
+    if (pde_base_va < max_va_lower || pde_base_va >= min_va_upper)
+        return true;
+
+    return false;
+}
+
+// PDE Permission Control Flags
+static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir, NvU32 child_index)
+{
+    const NvU8 (*pcf)[2] = valid ? valid_pcf : invalid_pcf;
+    NvU8 depth = dir->depth;
+
+    UVM_ASSERT(pde_type < PDE_TYPE_COUNT);
+    UVM_ASSERT(depth < 5);
+
+    // On non-ATS systems, PDE PCF only sets the valid and volatile/cache bits.
+    if (!g_uvm_global.ats.enabled)
+        return pcf[pde_type][ATS_ALLOWED];
+
+    // We assume all supported ATS platforms use canonical form address.
+    // See comments in uvm_gpu.c:uvm_gpu_can_address() and in
+    // uvm_mmu.c:page_tree_ats_init();
+    UVM_ASSERT(uvm_platform_uses_canonical_form_address());
+
+    // Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
+    // ATS and GMMU page tables. For managed memory we need to prevent this
+    // parallel lookup since we would not get any GPU fault if the CPU has
+    // a valid mapping. Also, for external ranges that are known to be
+    // mapped entirely on the GMMU page table we can skip the ATS lookup
+    // for performance reasons. Parallel ATS lookup is disabled in PDE1
+    // (depth 3) and, therefore, it applies to the underlying 512MB VA
+    // range.
+    //
+    // UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
+    // This is fine because CUDA ensures that all managed and external
+    // allocations are properly compartmentalized in 512MB-aligned VA
+    // regions. For cudaHostRegister CUDA cannot control the VA range, but
+    // we rely on ATS for those allocations so they can't choose the
+    // ATS_NOT_ALLOWED mode.
+    // TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range to
+    //                    PTEs.
+    // HW complies with the leaf PDE's ATS_ALLOWED/ATS_NOT_ALLOWED settings,
+    // enabling us to treat any upper-level PDE as a don't care as long as there
+    // are leaf PDEs for the entire upper-level PDE range. We assume PDE4
+    // entries (depth == 0) are always ATS enabled, and the no_ats_range is in
+    // PDE3 or lower.
+    if (depth == 0 || (!valid && is_ats_range_valid(dir, child_index)))
+        return pcf[pde_type][ATS_ALLOWED];
+
+    return pcf[pde_type][ATS_NOT_ALLOWED];
+}
+
+static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -280,38 +386,17 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 dep
                break;
        }

-        // PCF (permission control flags) 5:3
-        // Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
-        // ATS and GMMU page tables. For managed memory we need to prevent this
-        // parallel lookup since we would not get any GPU fault if the CPU has
-        // a valid mapping. Also, for external ranges that are known to be
-        // mapped entirely on the GMMU page table we can skip the ATS lookup
-        // for performance reasons. Parallel ATS lookup is disabled in PDE1
-        // (depth 3) and, therefore, it applies to the underlying 512MB VA
-        // range.
-        //
-        // UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
-        // This is fine because CUDA ensures that all managed and external
-        // allocations are properly compartmentalized in 512MB-aligned VA
-        // regions. For cudaHostRegister CUDA cannot control the VA range, but
-        // we rely on ATS for those allocations so they can't choose the
-        // ATS_NOT_ALLOWED mode.
-        //
-        // TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
-        // to PTEs.
-        if (depth == 3 && g_uvm_global.ats.enabled)
-            pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
-        else
-            pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
-
        // address 51:12
        pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
    }

+    // PCF (permission control flags) 5:3
+    pde_bits |= HWVALUE64(_MMU_VER3, PDE, PCF, pde_pcf(phys_alloc != NULL, PDE_TYPE_SINGLE, dir, child_index));
+
    return pde_bits;
 }

-static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
+static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -330,17 +415,20 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
                break;
        }

-        // PCF (permission control flags) 5:3
-        pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
-
        // address 51:8
        pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
    }

+    // PCF (permission control flags) 5:3
+    pde_bits |= HWVALUE64(_MMU_VER3,
+                          DUAL_PDE,
+                          PCF_BIG,
+                          pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_BIG, dir, child_index));
+
    return pde_bits;
 }

-static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
+static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
 {
    NvU64 pde_bits = 0;

@ -359,32 +447,40 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
                break;
        }

-        // PCF (permission control flags) 69:67 [5:3]
-        pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
-
        // address 115:76 [51:12]
        pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
    }
+
+    // PCF (permission control flags) 69:67 [5:3]
+    pde_bits |= HWVALUE64(_MMU_VER3,
+                          DUAL_PDE,
+                          PCF_SMALL,
+                          pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_SMALL, dir, child_index));
+
    return pde_bits;
 }

 static void make_pde_hopper(void *entry,
                            uvm_mmu_page_table_alloc_t **phys_allocs,
-                            NvU32 depth,
-                            uvm_page_directory_t *child_dir)
+                            uvm_page_directory_t *dir,
+                            NvU32 child_index)
 {
-    NvU32 entry_count = entries_per_index_hopper(depth);
+    NvU32 entry_count;
    NvU64 *entry_bits = (NvU64 *)entry;

+    UVM_ASSERT(dir);
+
+    entry_count = entries_per_index_hopper(dir->depth);
+
    if (entry_count == 1) {
-        *entry_bits = single_pde_hopper(*phys_allocs, depth);
+        *entry_bits = single_pde_hopper(*phys_allocs, dir, child_index);
    }
    else if (entry_count == 2) {
-        entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
-        entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
+        entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG], dir, child_index);
+        entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL], dir, child_index);

        // This entry applies to the whole dual PDE but is stored in the lower
-        // bits
+        // bits.
        entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
    }
    else {
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@ -128,8 +128,9 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
 // present if we see the callback.
 //
 // The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
-// v3.19 (2014-11-13).
-    #if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
+// v3.19 (2014-11-13) and renamed in commit 1af5a8109904.
+    #if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE) || \
+        defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
        #define UVM_CAN_USE_MMU_NOTIFIERS() 1
    #else
        #define UVM_CAN_USE_MMU_NOTIFIERS() 0
--- a/kernel-open/nvidia-uvm/uvm_maxwell.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -71,4 +71,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->smc.supported = false;

    parent_gpu->plc_supported = false;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_maxwell_mmu.c
@ -108,11 +108,14 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)

 static void make_pde_maxwell(void *entry,
                             uvm_mmu_page_table_alloc_t **phys_allocs,
-                             NvU32 depth,
-                             uvm_page_directory_t *child_dir)
+                             uvm_page_directory_t *dir,
+                             NvU32 child_index)
 {
    NvU64 pde_bits = 0;
-    UVM_ASSERT(depth == 0);
+
+    UVM_ASSERT(dir);
+    UVM_ASSERT(dir->depth == 0);
+
    pde_bits |= HWCONST64(_MMU, PDE, SIZE, FULL);
    pde_bits |= big_half_pde_maxwell(phys_allocs[MMU_BIG]) | small_half_pde_maxwell(phys_allocs[MMU_SMALL]);

--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.c
@ -672,14 +672,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
        .finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
    };

-    // WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
-    // invalidates on read-only to read-write upgrades
-    //
-    // This code path isn't used on GH180 but we need to maintain consistent
-    // behaviour on systems that do.
-    if (!vma_is_anonymous(args->vma))
-        return NV_WARN_NOTHING_TO_DO;
-
    ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
    if (ret < 0)
        return errno_to_nv_status(ret);
@ -693,24 +685,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
    if (ret < 0)
        return errno_to_nv_status(ret);

-    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
-    //       support for it is added to the Linux kernel
-    //
-    // A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
-    // page can't be migrated (eg. because it's a non-anonymous mapping). We
-    // need this side-effect for SMMU on GH180 to ensure any cached read-only
-    // entries are flushed from SMMU on permission upgrade.
-    //
-    // TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
-    // invalidates on read-only to read-write upgrades
-    //
-    // The above WAR doesn't work for HugeTLBfs mappings because
-    // migrate_vma_setup() will fail in that case.
-    if (!vma_is_anonymous(args->vma)) {
-        migrate_vma_finalize(args);
-        return NV_WARN_NOTHING_TO_DO;
-    }
-
    uvm_migrate_vma_alloc_and_copy(args, state);
    if (state->status == NV_OK) {
        migrate_vma_pages(args);
@ -862,6 +836,17 @@ static NV_STATUS migrate_pageable_vma_region(struct vm_area_struct *vma,
    return NV_OK;
 }

+NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
+{
+    uvm_va_space_t *va_space = uvm_va_space_get(filp);
+
+    uvm_va_space_down_write(va_space);
+    va_space->test.skip_migrate_vma = params->skip;
+    uvm_va_space_up_write(va_space);
+
+    return NV_OK;
+}
+
 static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
                                      unsigned long start,
                                      unsigned long outer,
@ -884,13 +869,12 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
    start = max(start, vma->vm_start);
    outer = min(outer, vma->vm_end);

-    // migrate_vma only supports anonymous VMAs. We check for those after
-    // calling migrate_vma_setup() to workaround Bug 4130089. We need to check
-    // for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
-    // error for those.
-    // TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
-    // invalidates on read-only to read-write upgrades
-    if (is_vm_hugetlb_page(vma))
+    if (va_space->test.skip_migrate_vma)
+        return NV_WARN_NOTHING_TO_DO;
+
+    // TODO: Bug 2419180: support file-backed pages in migrate_vma, when
+    //       support for it is added to the Linux kernel
+    if (!vma_is_anonymous(vma))
        return NV_WARN_NOTHING_TO_DO;

    if (uvm_processor_mask_empty(&va_space->registered_gpus))
@ -950,7 +934,9 @@ static NV_STATUS migrate_pageable(migrate_vma_state_t *state)
            bool touch = uvm_migrate_args->touch;
            uvm_populate_permissions_t populate_permissions = uvm_migrate_args->populate_permissions;

-            UVM_ASSERT(!vma_is_anonymous(vma) || uvm_processor_mask_empty(&va_space->registered_gpus));
+            UVM_ASSERT(va_space->test.skip_migrate_vma ||
+                       !vma_is_anonymous(vma) ||
+                       uvm_processor_mask_empty(&va_space->registered_gpus));

            // We can't use migrate_vma to move the pages as desired. Normally
            // this fallback path is supposed to populate the memory then inform
--- a/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
+++ b/kernel-open/nvidia-uvm/uvm_migrate_pageable.h
@ -218,6 +218,9 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
 NV_STATUS uvm_migrate_pageable_init(void);

 void uvm_migrate_pageable_exit(void);
+
+NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp);
+
 #else // UVM_MIGRATE_VMA_SUPPORTED

 static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
@ -251,6 +254,10 @@ static void uvm_migrate_pageable_exit(void)
 {
 }

+static inline NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
+{
+    return NV_OK;
+}
 #endif // UVM_MIGRATE_VMA_SUPPORTED

 #endif
--- a/kernel-open/nvidia-uvm/uvm_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_mmu.c
@ -338,7 +338,7 @@ static void pde_fill_cpu(uvm_page_tree_t *tree,
    UVM_ASSERT(sizeof(pde_data) >= entry_size);

    for (i = 0; i < pde_count; i++) {
-        tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i]);
+        tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i);

        if (entry_size == sizeof(pde_data[0]))
            uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
@ -393,7 +393,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,

        uvm_push_inline_data_begin(push, &inline_data);
        for (j = 0; j < entry_count; j++) {
-            tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i + j]);
+            tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i + j);
            uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
        }
        inline_data_addr = uvm_push_inline_data_end(&inline_data);
@ -407,9 +407,7 @@ static void pde_fill_gpu(uvm_page_tree_t *tree,

 // pde_fill() populates pde_count PDE entries (starting at start_index) with
 // the same mapping, i.e., with the same physical address (phys_addr).
-// pde_fill() is optimized for pde_count == 1, which is the common case. The
-// map_remap() function is the only case where pde_count > 1, only used on GA100
-// GPUs for 512MB page size mappings.
+// pde_fill() is optimized for pde_count == 1, which is the common case.
 static void pde_fill(uvm_page_tree_t *tree,
                     uvm_page_directory_t *directory,
                     NvU32 start_index,
@ -428,21 +426,26 @@ static void pde_fill(uvm_page_tree_t *tree,
 static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
 {
    NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
+    NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;

    // Passing in NULL for the phys_allocs will mark the child entries as
    // invalid.
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};

    // Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
-    // than 512 entries. We initialize them all with the same clean PDE.
-    // Additionally, only ATS systems may require clean PDEs bit settings based
-    // on the mapping VA.
-    if (dir->depth == tree->hal->page_table_depth(page_size) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
+    // than 512 entries. In this case, we initialize them all with the same
+    // clean PDE. ATS systems may require clean PDEs with
+    // ATS_ALLOWED/ATS_NOT_ALLOWED bit settings based on the mapping VA.
+    // We only clean_bits to 0 at the lowest page table level (PTE table), i.e.,
+    // when depth is greater than the max_pde_depth.
+    if ((dir->depth > max_pde_depth) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
        NvU64 clear_bits[2];

        // If it is not a PTE, make a clean PDE.
        if (dir->depth != tree->hal->page_table_depth(page_size)) {
-            tree->hal->make_pde(clear_bits, phys_allocs, dir->depth, dir->entries[0]);
+            // make_pde() child index is zero/ignored, since it is only used in
+            // PDEs on ATS-enabled systems where pde_fill() is preferred.
+            tree->hal->make_pde(clear_bits, phys_allocs, dir, 0);

            // Make sure that using only clear_bits[0] will work.
            UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
@ -816,7 +819,6 @@ static void free_unused_directories(uvm_page_tree_t *tree,
            }
        }
    }
-
 }

 static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
@ -827,6 +829,86 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
    return phys_mem_allocate(tree, alloc_size, tree->location, UVM_PMM_ALLOC_FLAGS_EVICT, out);
 }

+static bool page_tree_ats_init_required(uvm_page_tree_t *tree)
+{
+    // We have full control of the kernel page tables mappings, no ATS address
+    // aliases is expected.
+    if (tree->type == UVM_PAGE_TREE_TYPE_KERNEL)
+        return false;
+
+    // Enable uvm_page_tree_init() from the page_tree test.
+    if (uvm_enable_builtin_tests && tree->gpu_va_space == NULL)
+        return false;
+
+    if (!tree->gpu_va_space->ats.enabled)
+        return false;
+
+    return tree->gpu->parent->no_ats_range_required;
+}
+
+static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
+{
+    NV_STATUS status;
+    NvU64 min_va_upper, max_va_lower;
+    NvU32 page_size;
+
+    if (!page_tree_ats_init_required(tree))
+        return NV_OK;
+
+    page_size = uvm_mmu_biggest_page_size(tree);
+
+    uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
+
+    // Potential violation of the UVM internal get/put_ptes contract. get_ptes()
+    // creates and initializes enough PTEs to populate all PDEs covering the
+    // no_ats_ranges. We store the no_ats_ranges in the tree, so they can be
+    // put_ptes()'ed on deinit(). It doesn't preclude the range to be used by a
+    // future get_ptes(), since we don't write to the PTEs (range->table) from
+    // the tree->no_ats_ranges.
+    //
+    // Lower half
+    status = uvm_page_tree_get_ptes(tree,
+                                    page_size,
+                                    max_va_lower,
+                                    page_size,
+                                    UVM_PMM_ALLOC_FLAGS_EVICT,
+                                    &tree->no_ats_ranges[0]);
+    if (status != NV_OK)
+        return status;
+
+    UVM_ASSERT(tree->no_ats_ranges[0].entry_count == 1);
+
+    if (uvm_platform_uses_canonical_form_address()) {
+        // Upper half
+        status = uvm_page_tree_get_ptes(tree,
+                                        page_size,
+                                        min_va_upper - page_size,
+                                        page_size,
+                                        UVM_PMM_ALLOC_FLAGS_EVICT,
+                                        &tree->no_ats_ranges[1]);
+        if (status != NV_OK)
+            return status;
+
+        UVM_ASSERT(tree->no_ats_ranges[1].entry_count == 1);
+    }
+
+    return NV_OK;
+}
+
+static void page_tree_ats_deinit(uvm_page_tree_t *tree)
+{
+    size_t i;
+
+    if (page_tree_ats_init_required(tree)) {
+        for (i = 0; i < ARRAY_SIZE(tree->no_ats_ranges); i++) {
+            if (tree->no_ats_ranges[i].entry_count)
+                uvm_page_tree_put_ptes(tree, &tree->no_ats_ranges[i]);
+        }
+
+        memset(tree->no_ats_ranges, 0, sizeof(tree->no_ats_ranges));
+    }
+}
+
 static void map_remap_deinit(uvm_page_tree_t *tree)
 {
    if (tree->map_remap.pde0) {
@ -1032,11 +1114,22 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
        return status;

    phys_mem_init(tree, UVM_PAGE_SIZE_AGNOSTIC, tree->root, &push);
-    return page_tree_end_and_wait(tree, &push);
+
+    status = page_tree_end_and_wait(tree, &push);
+    if (status != NV_OK)
+        return status;
+
+    status = page_tree_ats_init(tree);
+    if (status != NV_OK)
+        return status;
+
+    return NV_OK;
 }

 void uvm_page_tree_deinit(uvm_page_tree_t *tree)
 {
+    page_tree_ats_deinit(tree);
+
    UVM_ASSERT(tree->root->ref_count == 0);

    // Take the tree lock only to avoid assertions. It is not required for
@ -1275,7 +1368,6 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
        UVM_ASSERT(uvm_gpu_can_address_kernel(tree->gpu, start, size));

    while (true) {
-
        // index of the entry, for the first byte of the range, within its
        // containing directory
        NvU32 start_index;
@ -1307,7 +1399,8 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
                if (dir_cache[dir->depth] == NULL) {
                    *cur_depth = dir->depth;

-                    // Undo the changes to the tree so that the dir cache remains private to the thread
+                    // Undo the changes to the tree so that the dir cache
+                    // remains private to the thread.
                    for (i = 0; i < used_count; i++)
                        host_pde_clear(tree, dirs_used[i]->host_parent, dirs_used[i]->index_in_parent, page_size);

@ -1405,7 +1498,8 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
                                  dir_cache)) == NV_ERR_MORE_PROCESSING_REQUIRED) {
        uvm_mutex_unlock(&tree->lock);

-        // try_get_ptes never needs depth 0, so store a directory at its parent's depth
+        // try_get_ptes never needs depth 0, so store a directory at its
+        // parent's depth.
        // TODO: Bug 1766655: Allocate everything below cur_depth instead of
        //       retrying for every level.
        dir_cache[cur_depth] = allocate_directory(tree, page_size, cur_depth + 1, pmm_flags);
@ -1688,8 +1782,12 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
                                              range);
        if (status != NV_OK) {
            UVM_ERR_PRINT("Failed to get PTEs for subrange %zd [0x%llx, 0x%llx) size 0x%llx, part of [0x%llx, 0x%llx)\n",
-                    i, range_start, range_start + range_size, range_size,
-                    start, size);
+                          i,
+                          range_start,
+                          range_start + range_size,
+                          range_size,
+                          start,
+                          size);
            goto out;
        }
    }
--- a/kernel-open/nvidia-uvm/uvm_mmu.h
+++ b/kernel-open/nvidia-uvm/uvm_mmu.h
@ -215,11 +215,14 @@ struct uvm_mmu_mode_hal_struct
    // memory out-of-range error so we can immediately identify bad PTE usage.
    NvU64 (*poisoned_pte)(void);

-    // write a PDE bit-pattern to entry based on the data in entries (which may
+    // Write a PDE bit-pattern to entry based on the data in allocs (which may
    // point to two items for dual PDEs).
-    // any of allocs are allowed to be NULL, in which case they are to be
-    // treated as empty.
-    void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth, uvm_page_directory_t *child_dir);
+    // Any of allocs are allowed to be NULL, in which case they are to be
+    // treated as empty. make_pde() uses dir and child_index to compute the
+    // mapping PDE VA. On ATS-enabled systems, we may set PDE's PCF as
+    // ATS_ALLOWED or ATS_NOT_ALLOWED based on the mapping PDE VA, even for
+    // invalid/clean PDE entries.
+    void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);

    // size of an entry in a directory/table.  Generally either 8 or 16 bytes.
    // (in the case of Pascal dual PDEs)
@ -299,6 +302,12 @@ struct uvm_page_tree_struct
        uvm_page_directory_t *pde0;
    } map_remap;

+    // On ATS-enabled systems where the CPU VA width is smaller than the GPU VA
+    // width, the excess address range is set with ATS_NOT_ALLOWED on all  leaf
+    // PDEs covering that range. We have at most 2 no_ats_ranges, due to
+    // canonical form address systems.
+    uvm_page_table_range_t no_ats_ranges[2];
+
    // Tracker for all GPU operations on the tree
    uvm_tracker_t tracker;
 };
--- a/kernel-open/nvidia-uvm/uvm_page_tree_test.c
+++ b/kernel-open/nvidia-uvm/uvm_page_tree_test.c
@ -1578,25 +1578,28 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
    uvm_mmu_mode_hal_t *hal;
+    uvm_page_directory_t dir;
    NvU32 i, j, big_page_size, page_size;

+    dir.depth = 0;
+
    for (i = 0; i < ARRAY_SIZE(big_page_sizes); i++) {
        big_page_size = big_page_sizes[i];
        hal = gpu->parent->arch_hal->mmu_mode_hal(big_page_size);

        memset(phys_allocs, 0, sizeof(phys_allocs));

-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
        TEST_CHECK_RET(pde_bits == 0x0L);

        phys_allocs[0] = &alloc_sys;
        phys_allocs[1] = &alloc_vid;
-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
        TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);

        phys_allocs[0] = &alloc_vid;
        phys_allocs[1] = &alloc_sys;
-        hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
+        hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
        TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);

        for (j = 0; j <= 2; j++) {
@ -1666,6 +1669,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
+    uvm_page_directory_t dir;

    // big versions have [11:8] set as well to test the page table merging
    uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
@ -1673,32 +1677,39 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent

    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

+    dir.index_in_parent = 0;
+    dir.host_parent = NULL;
+    dir.depth = 0;
+
    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0);

    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    dir.depth = 3;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);

    // Sys and vidmem PDEs
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    dir.depth = 0;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);

    // Dual PDEs
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    dir.depth = 3;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);

    // uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
@ -1754,6 +1765,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
+    uvm_page_directory_t dir;

    // big versions have [11:8] set as well to test the page table merging
    uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
@ -1761,37 +1773,45 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr

    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

+    dir.index_in_parent = 0;
+    dir.host_parent = NULL;
+    dir.depth = 0;
+
    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0);

    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    dir.depth = 3;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);

    // Sys and vidmem PDEs
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    dir.depth = 0;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);

    // Dual PDEs
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    dir.depth = 3;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 3, NULL);
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);

    // NO_ATS PDE1 (depth 2)
    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 2, NULL);
+    dir.depth = 2;
+    hal->make_pde(pde_bits, phys_allocs, &dir, 0);
    if (g_uvm_global.ats.enabled)
        TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
    else
@ -1826,104 +1846,203 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent

 static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
 {
+    NV_STATUS status = NV_OK;
    NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
    NvU64 pde_bits[2];
+    uvm_page_directory_t *dirs[5];
    size_t i, num_page_sizes;
    uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
    uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
    uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);

-    // big versions have [11:8] set as well to test the page table merging
+    // Big versions have [11:8] set as well to test the page table merging
    uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
    uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBBB00LL);

    uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);

-    // Make sure cleared PDEs work as expected
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0);
+    memset(dirs, 0, sizeof(dirs));
+    // Fake directory tree.
+    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+        dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
+        TEST_CHECK_GOTO(dirs[i] != NULL, cleanup);
+
+        dirs[i]->depth = i;
+        dirs[i]->index_in_parent = 0;
+
+        if (i == 0)
+            dirs[i]->host_parent = NULL;
+        else
+            dirs[i]->host_parent = dirs[i - 1];
+    }
+
+    // Make sure cleared PDEs work as expected.
+    hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
+    TEST_CHECK_GOTO(pde_bits[0] == 0, cleanup);

    // Cleared PDEs work as expected for big and small PDEs.
    memset(pde_bits, 0xFF, sizeof(pde_bits));
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
+    hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
+    TEST_CHECK_GOTO(pde_bits[0] == 0 && pde_bits[1] == 0, cleanup);

    // Sys and vidmem PDEs, uncached ATS allowed.
    phys_allocs[0] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
+    hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
+    TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);

    phys_allocs[0] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 0, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
+    hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
+    TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBB00A, cleanup);

-    // Dual PDEs, uncached.
+    // Dual PDEs, uncached. We don't use child_dir in the depth 4 checks because
+    // our policy decides the PDE's PCF without using it.
    phys_allocs[0] = &alloc_big_sys;
    phys_allocs[1] = &alloc_vid;
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
+    hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
+    if (g_uvm_global.ats.enabled)
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A, cleanup);
+    else
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999990C && pde_bits[1] == 0xBBBBBBB00A, cleanup);

    phys_allocs[0] = &alloc_big_vid;
    phys_allocs[1] = &alloc_sys;
-    hal->make_pde(pde_bits, phys_allocs, 4, NULL);
-    TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
+    hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
+    if (g_uvm_global.ats.enabled)
+        TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C, cleanup);
+    else
+        TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB0A && pde_bits[1] == 0x999999999900C, cleanup);
+
+    // We only need to test make_pde() on ATS when the CPU VA width < GPU's.
+    if (g_uvm_global.ats.enabled && uvm_cpu_num_va_bits() < hal->num_va_bits()) {
+        phys_allocs[0] = &alloc_sys;
+
+        dirs[1]->index_in_parent = 0;
+        hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
+
+        dirs[2]->index_in_parent = 0;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[2]->index_in_parent = 1;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 1);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[2]->index_in_parent = 2;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[2]->index_in_parent = 511;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 511);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[1]->index_in_parent = 1;
+        hal->make_pde(pde_bits, phys_allocs, dirs[0], 1);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
+
+        dirs[2]->index_in_parent = 0;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[2]->index_in_parent = 509;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        dirs[2]->index_in_parent = 510;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
+
+        phys_allocs[0] = NULL;
+
+        dirs[1]->index_in_parent = 0;
+        hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
+
+        dirs[2]->index_in_parent = 0;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
+
+        dirs[2]->index_in_parent = 2;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
+
+        dirs[1]->index_in_parent = 1;
+        dirs[2]->index_in_parent = 509;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
+
+        dirs[2]->index_in_parent = 510;
+        hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
+        TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
+    }

    // uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
    // access counters disabled.
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
-                                 0x9999999999000LL,
-                                 UVM_PROT_READ_WRITE_ATOMIC,
-                                 UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
+                                  0x9999999999000LL,
+                                  UVM_PROT_READ_WRITE_ATOMIC,
+                                  UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D,
+                    cleanup);

    // change to cached.
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
-                                 0x9999999999000LL,
-                                 UVM_PROT_READ_WRITE_ATOMIC,
-                                 UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
-                   0x9999999999685);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
+                                  0x9999999999000LL,
+                                  UVM_PROT_READ_WRITE_ATOMIC,
+                                  UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
+                                  0x9999999999685,
+                    cleanup);

    // enable access counters.
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
-                                 0x9999999999000LL,
-                                 UVM_PROT_READ_WRITE_ATOMIC,
-                                 UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
+                                  0x9999999999000LL,
+                                  UVM_PROT_READ_WRITE_ATOMIC,
+                                  UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605,
+                    cleanup);

    // remove atomic
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
-                                 0x9999999999000LL,
-                                 UVM_PROT_READ_WRITE,
-                                 UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
+                                  0x9999999999000LL,
+                                  UVM_PROT_READ_WRITE,
+                                  UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645,
+                    cleanup);

    // read only
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
-                                 0x9999999999000LL,
-                                 UVM_PROT_READ_ONLY,
-                                 UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
+                                  0x9999999999000LL,
+                                  UVM_PROT_READ_ONLY,
+                                  UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665,
+                    cleanup);

    // local video
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_VID,
-                                 0xBBBBBBB000LL,
-                                 UVM_PROT_READ_ONLY,
-                                 UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_VID,
+                                  0xBBBBBBB000LL,
+                                  UVM_PROT_READ_ONLY,
+                                  UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661,
+                    cleanup);

    // peer 1
-    TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_PEER_1,
-                                 0xBBBBBBB000LL,
-                                 UVM_PROT_READ_ONLY,
-                                 UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663);
+    TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_PEER_1,
+                                  0xBBBBBBB000LL,
+                                  UVM_PROT_READ_ONLY,
+                                  UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663,
+                    cleanup);

    // sparse
-    TEST_CHECK_RET(hal->make_sparse_pte() == 0x8);
+    TEST_CHECK_GOTO(hal->make_sparse_pte() == 0x8, cleanup);

    // sked reflected
-    TEST_CHECK_RET(hal->make_sked_reflected_pte() == 0xF09);
+    TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF09, cleanup);

    num_page_sizes = get_page_sizes(gpu, page_sizes);

    for (i = 0; i < num_page_sizes; i++)
-        TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
+        TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);

-    return NV_OK;
+cleanup:
+    for (i = 0; i < ARRAY_SIZE(dirs); i++)
+        uvm_kvfree(dirs[i]);
+
+    return status;
 }

 static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
@ -2347,7 +2466,13 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
    // calls.
    TEST_NV_CHECK_GOTO(fake_tlb_invals_alloc(), done);

-    TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
+    // We prevent the maxwell_test_page_tree test from running on ATS-enabled
+    // systems. On "fake" Maxwell-based ATS systems pde_fill() may push more
+    // methods than what we support in UVM. Specifically, on
+    // uvm_page_tree_init() which eventually calls phys_mem_init(). On Maxwell,
+    // upper PDE levels have more than 512 entries.
+    if (!g_uvm_global.ats.enabled)
+        TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
    TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
--- a/kernel-open/nvidia-uvm/uvm_pascal.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2020 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -100,4 +100,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->smc.supported = false;

    parent_gpu->plc_supported = false;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_pascal_mmu.c
@ -142,12 +142,16 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)

 static void make_pde_pascal(void *entry,
                            uvm_mmu_page_table_alloc_t **phys_allocs,
-                            NvU32 depth,
-                            uvm_page_directory_t *child_dir)
+                            uvm_page_directory_t *dir,
+                            NvU32 child_index)
 {
-    NvU32 entry_count = entries_per_index_pascal(depth);
+    NvU32 entry_count;
    NvU64 *entry_bits = (NvU64 *)entry;

+    UVM_ASSERT(dir);
+
+    entry_count = entries_per_index_pascal(dir->depth);
+
    if (entry_count == 1) {
        *entry_bits = single_pde_pascal(*phys_allocs);
    }
@ -155,7 +159,8 @@ static void make_pde_pascal(void *entry,
        entry_bits[MMU_BIG] = big_half_pde_pascal(phys_allocs[MMU_BIG]);
        entry_bits[MMU_SMALL] = small_half_pde_pascal(phys_allocs[MMU_SMALL]);

-        // This entry applies to the whole dual PDE but is stored in the lower bits
+        // This entry applies to the whole dual PDE but is stored in the lower
+        // bits.
        entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER2, DUAL_PDE, IS_PDE, TRUE);
    }
    else {
--- a/kernel-open/nvidia-uvm/uvm_test.c
+++ b/kernel-open/nvidia-uvm/uvm_test.c
@ -36,6 +36,7 @@
 #include "uvm_mmu.h"
 #include "uvm_gpu_access_counters.h"
 #include "uvm_pmm_sysmem.h"
+#include "uvm_migrate_pageable.h"

 static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
 {
@ -331,6 +332,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CPU_CHUNK_API, uvm_test_cpu_chunk_api);
+        UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SKIP_MIGRATE_VMA, uvm_test_skip_migrate_vma);
    }

    return -EINVAL;
--- a/kernel-open/nvidia-uvm/uvm_test_ioctl.h
+++ b/kernel-open/nvidia-uvm/uvm_test_ioctl.h
@ -28,6 +28,13 @@
 #include "uvm_ioctl.h"
 #include "nv_uvm_types.h"

+#define UVM_TEST_SKIP_MIGRATE_VMA                        UVM_TEST_IOCTL_BASE(103)
+typedef struct
+{
+    NvBool skip;                                         // In
+    NV_STATUS rmStatus;                                  // Out
+} UVM_TEST_SKIP_MIGRATE_VMA_PARAMS;
+
 #ifdef __cplusplus
 extern "C" {
 #endif
--- a/kernel-open/nvidia-uvm/uvm_tools.c
+++ b/kernel-open/nvidia-uvm/uvm_tools.c
@ -1082,25 +1082,19 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
 }


-void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
-                                     NvU32 batch_id,
-                                     uvm_fault_client_type_t client_type)
+void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
 {
    UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);

    if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
        return;

-    record_replay_event_helper(gpu->id,
-                               batch_id,
-                               client_type,
-                               NV_GETTIME(),
-                               gpu->parent->host_hal->get_time(gpu));
+    record_replay_event_helper(gpu->id, batch_id, client_type, NV_GETTIME(), gpu->parent->host_hal->get_time(gpu));
 }

 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
                                        const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                        bool on_managed)
+                                        bool on_managed_phys)
 {
    UvmEventEntry entry;
    UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
@ -1119,6 +1113,7 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
    info->srcIndex            = uvm_id_value(gpu->id);
    info->address             = buffer_entry->address.address;
    info->isVirtual           = buffer_entry->address.is_virtual? 1: 0;
+
    if (buffer_entry->address.is_virtual) {
        info->instancePtr         = buffer_entry->virtual_info.instance_ptr.address;
        info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
@ -1126,9 +1121,10 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
    }
    else {
        info->aperture            = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
+        info->physOnManaged       = on_managed_phys? 1 : 0;
    }
+
    info->isFromCpu           = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
-    info->onManaged           = on_managed? 1 : 0;
    info->value               = buffer_entry->counter_value;
    info->subGranularity      = buffer_entry->sub_granularity;
    info->bank                = buffer_entry->bank;
--- a/kernel-open/nvidia-uvm/uvm_tools.h
+++ b/kernel-open/nvidia-uvm/uvm_tools.h
@ -102,18 +102,13 @@ void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
                                                uvm_va_block_region_t region,
                                                const uvm_page_mask_t *page_mask);

-void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
-                                uvm_push_t *push,
-                                NvU32 batch_id,
-                                uvm_fault_client_type_t client_type);
+void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id, uvm_fault_client_type_t client_type);

-void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
-                                     NvU32 batch_id,
-                                     uvm_fault_client_type_t client_type);
+void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);

 void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
                                        const uvm_access_counter_buffer_entry_t *buffer_entry,
-                                        bool on_managed);
+                                        bool on_managed_phys);

 void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);

--- a/kernel-open/nvidia-uvm/uvm_turing.c
+++ b/kernel-open/nvidia-uvm/uvm_turing.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2017-2021 NVIDIA Corporation
+    Copyright (c) 2017-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -93,4 +93,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->smc.supported = false;

    parent_gpu->plc_supported = true;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_types.h
+++ b/kernel-open/nvidia-uvm/uvm_types.h
@ -967,8 +967,10 @@ typedef struct
    NvU8 isFromCpu;

    NvU8 veId;
-    NvU8 onManaged;         // The access counter notification was triggered on
-                            // a managed memory region
+
+    // The physical access counter notification was triggered on a managed
+    // memory region. This is not set for virtual access counter notifications.
+    NvU8 physOnManaged;

    NvU32 value;
    NvU32 subGranularity;
--- a/kernel-open/nvidia-uvm/uvm_va_block.c
+++ b/kernel-open/nvidia-uvm/uvm_va_block.c
@ -1760,6 +1760,21 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
    return (NvU32)chunk_size;
 }

+NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
+                                     uvm_processor_id_t processor,
+                                     uvm_page_index_t page_index)
+{
+    block_phys_page_t page;
+
+    UVM_ASSERT(block);
+
+    uvm_assert_mutex_locked(&block->lock);
+
+    page = block_phys_page(processor, page_index);
+
+    return block_phys_page_size(block, page);
+}
+
 static uvm_pte_bits_cpu_t get_cpu_pte_bit_index(uvm_prot_t prot)
 {
    uvm_pte_bits_cpu_t pte_bit_index = UVM_PTE_BITS_CPU_MAX;
@ -8248,14 +8263,6 @@ void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
    event_data.block_munmap.region = region;
    uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_BLOCK_MUNMAP, &event_data);

-    // Set a flag so that GPU fault events are flushed since they might refer
-    // to the region being unmapped.
-    // Note that holding the va_block lock prevents GPU VA spaces from
-    // being removed so the registered_gpu_va_spaces mask is stable.
-    for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
-        uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
-    }
-
    // Release any remaining vidmem chunks in the given region.
    for_each_gpu_id(gpu_id) {
        uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
@ -10172,7 +10179,11 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
    // The logic that's used to detect remote faulting also keeps memory in place for
    // ptrace accesses. We would prefer to control those policies separately, but the
    // NIC case takes priority.
+    // If the accessing processor is CPU, we're either handling a fault
+    // from other than owning process, or we're handling an MOMC
+    // notification. Only prevent migration for the former.
    if (UVM_ID_IS_CPU(processor_id) &&
+        operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&        
        uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
        va_block_context->mm != current->mm) {
        UVM_ASSERT(va_block_context->mm != NULL);
--- a/kernel-open/nvidia-uvm/uvm_va_block.h
+++ b/kernel-open/nvidia-uvm/uvm_va_block.h
@ -2072,6 +2072,14 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
 // Locking: The va_block lock must be held.
 void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);

+// Get the size of the physical allocation backing the page at page_index on the
+// specified processor in the block. Returns 0 if the address is not resident on
+// the specified processor.
+// Locking: The va_block lock must be held.
+NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
+                                     uvm_processor_id_t processor,
+                                     uvm_page_index_t page_index);
+
 // Get CPU page size or 0 if it is not mapped
 NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
                                 uvm_page_index_t page_index);
--- a/kernel-open/nvidia-uvm/uvm_va_space.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space.c
@ -1540,7 +1540,6 @@ static void remove_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space,
    atomic_inc(&va_space->gpu_va_space_deferred_free.num_pending);

    uvm_processor_mask_clear(&va_space->registered_gpu_va_spaces, gpu_va_space->gpu->id);
-    uvm_processor_mask_clear_atomic(&va_space->needs_fault_buffer_flush, gpu_va_space->gpu->id);
    va_space->gpu_va_spaces[uvm_id_gpu_index(gpu_va_space->gpu->id)] = NULL;
    gpu_va_space->state = UVM_GPU_VA_SPACE_STATE_DEAD;
 }
--- a/kernel-open/nvidia-uvm/uvm_va_space.h
+++ b/kernel-open/nvidia-uvm/uvm_va_space.h
@ -253,17 +253,6 @@ struct uvm_va_space_struct
    // corrupting state.
    uvm_processor_mask_t gpu_unregister_in_progress;

-    // On VMA destruction, the fault buffer needs to be flushed for all the GPUs
-    // registered in the VA space to avoid leaving stale entries of the VA range
-    // that is going to be destroyed. Otherwise, these fault entries can be
-    // attributed to new VA ranges reallocated at the same addresses. However,
-    // uvm_vm_close is called with mm->mmap_lock taken and we cannot take the
-    // ISR lock. Therefore, we use a flag to notify the GPU fault handler that
-    // the fault buffer needs to be flushed, before servicing the faults that
-    // belong to the va_space. The bits are set and cleared atomically so no
-    // va_space lock is required.
-    uvm_processor_mask_t needs_fault_buffer_flush;
-
    // Mask of processors that are participating in system-wide atomics
    uvm_processor_mask_t system_wide_atomics_enabled_processors;

@ -353,6 +342,7 @@ struct uvm_va_space_struct
    struct
    {
        bool  page_prefetch_enabled;
+        bool  skip_migrate_vma;

        atomic_t migrate_vma_allocation_fail_nth;

--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@ -215,7 +215,13 @@ bool uvm_va_space_mm_enabled(uvm_va_space_t *va_space)

    static struct mmu_notifier_ops uvm_mmu_notifier_ops_ats =
    {
+#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
        .invalidate_range = uvm_mmu_notifier_invalidate_range_ats,
+#elif defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
+        .arch_invalidate_secondary_tlbs = uvm_mmu_notifier_invalidate_range_ats,
+#else
+        #error One of invalidate_range/arch_invalid_secondary must be present
+#endif
    };

    static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)
--- a/kernel-open/nvidia-uvm/uvm_volta.c
+++ b/kernel-open/nvidia-uvm/uvm_volta.c
@ -1,5 +1,5 @@
 /*******************************************************************************
-    Copyright (c) 2016-2021 NVIDIA Corporation
+    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
@ -98,4 +98,6 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
    parent_gpu->smc.supported = false;

    parent_gpu->plc_supported = false;
+
+    parent_gpu->no_ats_range_required = false;
 }
--- a/kernel-open/nvidia-uvm/uvm_volta_mmu.c
+++ b/kernel-open/nvidia-uvm/uvm_volta_mmu.c
@ -147,14 +147,18 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)

 static void make_pde_volta(void *entry,
                           uvm_mmu_page_table_alloc_t **phys_allocs,
-                           NvU32 depth,
-                           uvm_page_directory_t *child_dir)
+                           uvm_page_directory_t *dir,
+                           NvU32 child_index)
 {
-    NvU32 entry_count = entries_per_index_volta(depth);
+    NvU32 entry_count;
    NvU64 *entry_bits = (NvU64 *)entry;

+    UVM_ASSERT(dir);
+
+    entry_count = entries_per_index_volta(dir->depth);
+
    if (entry_count == 1) {
-        *entry_bits = single_pde_volta(*phys_allocs, depth);
+        *entry_bits = single_pde_volta(*phys_allocs, dir->depth);
    }
    else if (entry_count == 2) {
        entry_bits[MMU_BIG] = big_half_pde_volta(phys_allocs[MMU_BIG]);
--- a/kernel-open/nvidia/nv-msi.c
+++ b/kernel-open/nvidia/nv-msi.c
@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
        {
            for( j = 0; j < i; j++)
            {
-                free_irq(nvl->msix_entries[i].vector, (void *)nvl);
+                free_irq(nvl->msix_entries[j].vector, (void *)nvl);
            }
            break;
        }
--- a/kernel-open/nvidia/nv-p2p.c
+++ b/kernel-open/nvidia/nv-p2p.c
@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
    (*page_table)->page_size = page_size_index;

    os_free_mem(physical_addresses);
+    physical_addresses = NULL;
+
    os_free_mem(wreqmb_h);
+    wreqmb_h = NULL;
+
    os_free_mem(rreqmb_h);
+    rreqmb_h = NULL;

    if (free_callback != NULL)
    {
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r537_41
+    #define NV_BUILD_BRANCH             r537_68
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r537_41
+    #define NV_PUBLIC_BRANCH             r537_68
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r537_41-286"
-#define NV_BUILD_CHANGELIST_NUM         (33292694)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r537_68-335"
+#define NV_BUILD_CHANGELIST_NUM         (33430121)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r537_41-286"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r537_68-335"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33430121)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r537_41-1"
-#define NV_BUILD_CHANGELIST_NUM         (33292694)
+#define NV_BUILD_BRANCH_VERSION         "r537_68-2"
+#define NV_BUILD_CHANGELIST_NUM         (33425293)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "537.42"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
+#define NV_BUILD_NAME                   "537.70"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33425293)
 #define NV_BUILD_BRANCH_BASE_VERSION    R535
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "535.113.01"
+#define NV_VERSION_STRING               "535.129.03"

 #else

--- a/src/common/inc/swref/published/ampere/ga100/hwproject.h
+++ b/src/common/inc/swref/published/ampere/ga100/hwproject.h
@ -0,0 +1,28 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __ga100_hwproject_h__
+#define __ga100_hwproject_h__
+
+#define NV_SCAL_LITTER_NUM_FBPAS                       24
+
+#endif // __ga100_hwproject_h__
--- a/src/common/inc/swref/published/hopper/gh100/dev_fb.h
+++ b/src/common/inc/swref/published/hopper/gh100/dev_fb.h
@ -30,24 +30,4 @@
 #define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR             31:0 /* RWIVF */
 #define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK  0x000FFFFF /* ----V */

-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E78 /* RW-4R */
-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E78 /* RW-4R */
-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
-#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
-
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E8C /* RW-4R */
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E8C /* RW-4R */
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
-#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
-
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT               0x00100EA0 /* RW-4R */
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT               0x00100EA0 /* RW-4R */
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
-#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
 #endif // __gh100_dev_fb_h_
--- a/src/common/inc/swref/published/hopper/gh100/dev_gsp.h
+++ b/src/common/inc/swref/published/hopper/gh100/dev_gsp.h
@ -31,4 +31,22 @@
 #define NV_PGSP_FALCON_ENGINE_RESET_STATUS_ASSERTED                                                      0x00000000     /* R-E-V */
 #define NV_PGSP_FALCON_ENGINE_RESET_STATUS_DEASSERTED                                                    0x00000002     /* R---V */
 #define NV_PGSP_MAILBOX(i)                                                                               (0x110804+(i)*4) /* RW-4A */
+#define NV_PGSP_EMEMC(i)                                                                                 (0x110ac0+(i)*8) /* RW-4A */
+#define NV_PGSP_EMEMC__SIZE_1                                                                            8              /*       */
+#define NV_PGSP_EMEMC_OFFS                                                                               7:2            /* RWIVF */
+#define NV_PGSP_EMEMC_OFFS_INIT                                                                          0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_BLK                                                                                15:8           /* RWIVF */
+#define NV_PGSP_EMEMC_BLK_INIT                                                                           0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCW                                                                              24:24          /* RWIVF */
+#define NV_PGSP_EMEMC_AINCW_INIT                                                                         0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCW_TRUE                                                                         0x00000001     /* RW--V */
+#define NV_PGSP_EMEMC_AINCW_FALSE                                                                        0x00000000     /* RW--V */
+#define NV_PGSP_EMEMC_AINCR                                                                              25:25          /* RWIVF */
+#define NV_PGSP_EMEMC_AINCR_INIT                                                                         0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCR_TRUE                                                                         0x00000001     /* RW--V */
+#define NV_PGSP_EMEMC_AINCR_FALSE                                                                        0x00000000     /* RW--V */
+#define NV_PGSP_EMEMD(i)                                                                                 (0x110ac4+(i)*8) /* RW-4A */
+#define NV_PGSP_EMEMD__SIZE_1                                                                            8              /*       */
+#define NV_PGSP_EMEMD_DATA                                                                               31:0           /* RWXVF */
+
 #endif // __gh100_dev_gsp_h__
--- a/src/common/inc/swref/published/hopper/gh100/hwproject.h
+++ b/src/common/inc/swref/published/hopper/gh100/hwproject.h
@ -21,9 +21,6 @@
 * DEALINGS IN THE SOFTWARE.
 */
 #define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS              52
-#define NV_LTC_PRI_STRIDE                            8192
-#define NV_LTS_PRI_STRIDE                             512
-#define NV_FBPA_PRI_STRIDE                      16384
-#define NV_SCAL_LITTER_NUM_FBPAS                       24
 #define NV_XPL_BASE_ADDRESS                    540672
 #define NV_XTL_BASE_ADDRESS                    593920
+#define NV_FBPA_PRI_STRIDE                      16384
--- a/src/common/inc/swref/published/nvswitch/ls10/ptop_discovery_ip.h
+++ b/src/common/inc/swref/published/nvswitch/ls10/ptop_discovery_ip.h
@ -0,0 +1,28 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __ls10_ptop_discovery_ip_h__
+#define __ls10_ptop_discovery_ip_h__
+/* This file is autogenerated.  Do not edit */
+#define  NV_PTOP_UNICAST_SW_DEVICE_BASE_SAW_0                                0x00028000                    /*       */
+#endif // __ls10_ptop_discovery_ip_h__
--- a/src/common/inc/swref/published/turing/tu102/dev_fb.h
+++ b/src/common/inc/swref/published/turing/tu102/dev_fb.h
@ -38,4 +38,25 @@
 #define NV_PFB_PRI_MMU_WPR2_ADDR_HI_VAL                                              31:4 /* RWEVF */
 #define NV_PFB_PRI_MMU_WPR2_ADDR_HI_ALIGNMENT                                  0x0000000c /*       */

+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E78 /* RW-4R */
+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E78 /* RW-4R */
+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
+#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
+
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E8C /* RW-4R */
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT               0x00100E8C /* RW-4R */
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
+#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
+
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT               0x00100EA0 /* RW-4R */
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT               0x00100EA0 /* RW-4R */
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL               15:0 /* RWEVF */
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT             0 /* RWE-V */
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE             31:16 /* RWEVF */
+#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT            0 /* RWE-V */
+
 #endif // __tu102_dev_fb_h__
--- a/src/common/inc/swref/published/turing/tu102/dev_fbpa.h
+++ b/src/common/inc/swref/published/turing/tu102/dev_fbpa.h
@ -0,0 +1,29 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __tu102_dev_fbpa_h_
+#define __tu102_dev_fbpa_h_
+
+#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1               2 /*       */
+#define NV_PFB_FBPA_0_ECC_DED_COUNT(i)                   (0x00900488+(i)*4) /* RW-4A */
+#endif // __tu102_dev_fbpa_h_
--- a/src/common/inc/swref/published/turing/tu102/dev_gsp.h
+++ b/src/common/inc/swref/published/turing/tu102/dev_gsp.h
@ -38,5 +38,22 @@
 #define NV_PGSP_QUEUE_HEAD(i)                                                                            (0x110c00+(i)*8) /* RW-4A */
 #define NV_PGSP_QUEUE_HEAD__SIZE_1                                                                       8              /*       */
 #define NV_PGSP_QUEUE_HEAD_ADDRESS                                                                       31:0           /* RWIVF */
+#define NV_PGSP_EMEMC(i)                                                                                 (0x110ac0+(i)*8) /* RW-4A */
+#define NV_PGSP_EMEMC__SIZE_1                                                                            4              /*       */
+#define NV_PGSP_EMEMC_OFFS                                                                               7:2            /* RWIVF */
+#define NV_PGSP_EMEMC_OFFS_INIT                                                                          0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_BLK                                                                                15:8           /* RWIVF */
+#define NV_PGSP_EMEMC_BLK_INIT                                                                           0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCW                                                                              24:24          /* RWIVF */
+#define NV_PGSP_EMEMC_AINCW_INIT                                                                         0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCW_TRUE                                                                         0x00000001     /* RW--V */
+#define NV_PGSP_EMEMC_AINCW_FALSE                                                                        0x00000000     /* RW--V */
+#define NV_PGSP_EMEMC_AINCR                                                                              25:25          /* RWIVF */
+#define NV_PGSP_EMEMC_AINCR_INIT                                                                         0x00000000     /* RWI-V */
+#define NV_PGSP_EMEMC_AINCR_TRUE                                                                         0x00000001     /* RW--V */
+#define NV_PGSP_EMEMC_AINCR_FALSE                                                                        0x00000000     /* RW--V */
+#define NV_PGSP_EMEMD(i)                                                                                 (0x110ac4+(i)*8) /* RW-4A */
+#define NV_PGSP_EMEMD__SIZE_1                                                                            4              /*       */
+#define NV_PGSP_EMEMD_DATA                                                                               31:0           /* RW-VF */

 #endif // __tu102_dev_gsp_h__
--- a/src/common/inc/swref/published/turing/tu102/dev_ltc.h
+++ b/src/common/inc/swref/published/turing/tu102/dev_ltc.h
@ -21,8 +21,8 @@
 * DEALINGS IN THE SOFTWARE.
 */

-#ifndef __gh100_dev_ltc_h_
-#define __gh100_dev_ltc_h_
+#ifndef __tu102_dev_ltc_h_
+#define __tu102_dev_ltc_h_

 #define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT                  0x001404f8 /* RW-4R */
 #define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL                  15:0 /* RWIVF */
@ -30,4 +30,4 @@
 #define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE                31:16 /* RWIVF */
 #define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT          0x0000 /* RWI-V */

-#endif // __gh100_dev_ltc_h_
+#endif // __tu102_dev_ltc_h_
--- a/src/common/inc/swref/published/turing/tu102/hwproject.h
+++ b/src/common/inc/swref/published/turing/tu102/hwproject.h
@ -25,5 +25,9 @@
 #define __tu102_hwproject_h__

 #define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS              47
+#define NV_SCAL_LITTER_NUM_FBPAS                       16
+#define NV_FBPA_PRI_STRIDE                      16384
+#define NV_LTC_PRI_STRIDE                            8192
+#define NV_LTS_PRI_STRIDE                             512

 #endif // __tu102_hwproject_h__
--- a/src/common/nvswitch/common/inc/soe/soeififr.h
+++ b/src/common/nvswitch/common/inc/soe/soeififr.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -37,6 +37,17 @@ enum
    RM_SOE_IFR_BBX_SHUTDOWN,
    RM_SOE_IFR_BBX_SXID_ADD,
    RM_SOE_IFR_BBX_SXID_GET,
+    RM_SOE_IFR_BBX_DATA_GET,
+};
+
+enum
+{
+    RM_SOE_IFR_BBX_GET_NONE,
+    RM_SOE_IFR_BBX_GET_SXID,
+    RM_SOE_IFR_BBX_GET_SYS_INFO,
+    RM_SOE_IFR_BBX_GET_TIME_INFO,
+    RM_SOE_IFR_BBX_GET_TEMP_DATA,
+    RM_SOE_IFR_BBX_GET_TEMP_SAMPLES,
 };

 typedef struct
@ -75,6 +86,14 @@ typedef struct
    RM_FLCN_U64 dmaHandle;
 } RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS;

+typedef struct
+{
+    NvU8 cmdType;
+    NvU32 sizeInBytes;
+    RM_FLCN_U64 dmaHandle;
+    NvU8 dataType;
+} RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS;
+
 typedef union
 {
 	NvU8	cmdType;
@ -82,6 +101,7 @@ typedef union
    RM_SOE_IFR_CMD_BBX_INIT_PARAMS bbxInit;
    RM_SOE_IFR_CMD_BBX_SXID_ADD_PARAMS bbxSxidAdd;
    RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS bbxSxidGet;
+    RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS bbxDataGet;
 } RM_SOE_IFR_CMD;

 // entry of getSxid
@ -99,4 +119,81 @@ typedef struct
    RM_SOE_BBX_SXID_ENTRY sxidLast[INFOROM_BBX_OBJ_XID_ENTRIES];
 } RM_SOE_BBX_GET_SXID_DATA;

+// NVSwitch system version information returning with the command GET_SYS_INFO
+typedef struct
+{
+    NvU32 driverLo;             //Driver Version Low 32 bits
+    NvU16 driverHi;             //Driver Version High 16 bits
+    NvU32 vbiosVersion;         //VBIOS Version 
+    NvU8 vbiosVersionOem;       //VBIOS OEM Version byte
+    NvU8  osType;               //OS Type (UNIX/WIN/WIN2K/WIN9x/OTHER)
+    NvU32 osVersion;            //OS Version (Build|MINOR|MAJOR)
+} RM_SOE_BBX_GET_SYS_INFO_DATA;
+
+// NVSwitch time information returning with the command GET_TIME_INFO
+typedef struct
+{
+    NvU32 timeStart;            //Timestamp (EPOCH) when the driver was loaded on the GPU for the first time
+    NvU32 timeEnd;              //Timestamp (EPOCH) when the data was last flushed
+    NvU32 timeRun;              //Amount of time (in seconds) driver was loaded, and GPU has run
+    NvU32 time24Hours;          //Timestamp (EPOCH) of when the first 24 operational hours is hit
+    NvU32 time100Hours;         //Timestamp (EPOCH) of when the first 100 operational hours is hit
+} RM_SOE_BBX_GET_TIME_INFO_DATA;
+
+#define RM_SOE_BBX_TEMP_DAY_ENTRIES               5
+#define RM_SOE_BBX_TEMP_WEEK_ENTRIES              5
+#define RM_SOE_BBX_TEMP_MNT_ENTRIES               5
+#define RM_SOE_BBX_TEMP_ALL_ENTRIES               5
+#define RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES          23
+#define RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES           5
+#define RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES           3
+#define RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES    20
+#define RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES    21
+#define RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES        168
+#define RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES   1096
+#define RM_SOE_BBX_NUM_COMPRESSION_PERIODS        8
+
+// NVSwitch Temperature Entry
+typedef struct
+{
+    NvU16 value;                //Temperature (SFXP 9.7 format in Celsius)
+    NvU32 timestamp;            //Timestamp (EPOCH) of when the entry is recorded
+} RM_SOE_BBX_TEMP_ENTRY;
+
+// NVSwitch Temperature Data returning with the command GET_TEMP_DATA
+typedef struct
+{
+    NvU32                 tempMaxDayIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMaxDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
+    NvU32                 tempMaxWeekIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMaxWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
+    NvU32                 tempMaxMntIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMaxMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
+    NvU32                 tempMaxAllIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMaxAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
+    NvU32                 tempMinDayIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMinDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
+    NvU32                 tempMinWeekIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMinWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
+    NvU32                 tempMinMntIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMinMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
+    NvU32                 tempMinAllIdx;
+    RM_SOE_BBX_TEMP_ENTRY tempMinAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
+    NvU32                 tempSumDelta;
+    NvU32                 tempSumHour[RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES];
+    NvU32                 tempSumDay[RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES];
+    NvU32                 tempSumMnt[RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES];
+    NvU32                 tempHistogramThld[RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES];
+    NvU32                 tempHistogramTime[RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES];
+    RM_SOE_BBX_TEMP_ENTRY tempHourlyMaxSample[RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES];
+} RM_SOE_BBX_GET_TEMP_DATA;
+
+// NVSwitch Temperature Compressed Samples returning with the command GET_TEMP_SAMPLES
+typedef struct
+{
+    NvU32                 compressionPeriodIdx;
+    NvU32                 compressionPeriod[RM_SOE_BBX_NUM_COMPRESSION_PERIODS];
+    RM_SOE_BBX_TEMP_ENTRY tempCompressionBuffer[RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES];
+} RM_SOE_BBX_GET_TEMP_SAMPLES;
+
 #endif // _SOEIFIFR_H_
--- a/src/common/nvswitch/interface/ctrl_dev_nvswitch.h
+++ b/src/common/nvswitch/interface/ctrl_dev_nvswitch.h
@ -830,6 +830,7 @@ typedef enum nvswitch_err_type
    NVSWITCH_ERR_HW_HOST_THERMAL_SHUTDOWN                              = 10006,
    NVSWITCH_ERR_HW_HOST_IO_FAILURE                                    = 10007,
    NVSWITCH_ERR_HW_HOST_FIRMWARE_INITIALIZATION_FAILURE               = 10008,
+    NVSWITCH_ERR_HW_HOST_FIRMWARE_RECOVERY_MODE                        = 10009,
    NVSWITCH_ERR_HW_HOST_LAST,


@ -2973,6 +2974,197 @@ typedef struct
    NVSWITCH_SXID_ENTRY sxidLast[NVSWITCH_SXID_ENTRIES_NUM];
 } NVSWITCH_GET_SXIDS_PARAMS;

+/*
+ * CTRL_NVSWITCH_GET_SYS_INFO
+ *
+ * Control to get the NVSwitch system version information from inforom cache 
+ *
+ * Parameters:
+ *    driverLo [OUT]
+ *      The driver version low 32 bits. Example: driverLo = 54531 (Driver 545.31)
+ *    driverHi [OUT]
+ *      The driver version high 16 bits
+ *    vbiosVersion [OUT]
+ *      The vbios version number. Example: vbiosVersion=0x96104100 (release 96.10.41.00)
+ *    vbiosVersionOem [OUT]
+ *      The vbios OEM version byte.
+ *    osType [OUT]
+ *      The OS type. Example:  osType=0x05 (UNIX)
+ *    osVersion [OUT]
+ *      The OS version number. [BUILD[31:16]|MINOR[15:8]|MAJOR[7:0]]
+ */
+
+typedef struct
+{
+    NvU32 driverLo;
+    NvU16 driverHi;
+    NvU32 vbiosVersion;
+    NvU8  vbiosVersionOem;
+    NvU8  osType;
+    NvU32 osVersion;
+} NVSWITCH_GET_SYS_INFO_PARAMS;
+
+/*
+ * CTRL_NVSWITCH_GET_TIME_INFO
+ *
+ * Control to get the NVSwitch time information from inforom cache 
+ *
+ * Parameters:
+ *    timeStart [OUT]
+ *      The timestamp (EPOCH) when driver load onto the NVSwitch for the 1st time
+ *    timeEnd [OUT]
+ *      The timestamp (EPOCH) when the data was last flushed
+ *    timeRun [OUT]
+ *      The amount of time (in seconds) driver was loaded/running
+ *    time24Hours [OUT]
+ *      The timestamp (EPOCH) when the first 24 operational hours is hit
+ *    time100Hours [OUT]
+ *      The timestamp (EPOCH) when the first 100 operational hours is hit
+ */
+
+typedef struct
+{
+    NvU32 timeStart;
+    NvU32 timeEnd;
+    NvU32 timeRun;
+    NvU32 time24Hours;
+    NvU32 time100Hours;
+} NVSWITCH_GET_TIME_INFO_PARAMS;
+
+#define NVSWITCH_TEMP_DAY_ENTRIES               5
+#define NVSWITCH_TEMP_WEEK_ENTRIES              5
+#define NVSWITCH_TEMP_MNT_ENTRIES               5
+#define NVSWITCH_TEMP_ALL_ENTRIES               5
+#define NVSWITCH_TEMP_SUM_HOUR_ENTRIES          23
+#define NVSWITCH_TEMP_SUM_DAY_ENTRIES           5
+#define NVSWITCH_TEMP_SUM_MNT_ENTRIES           3
+#define NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES    20
+#define NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES    21
+#define NVSWITCH_TEMP_HOURLY_MAX_ENTRIES        168
+
+/*
+ * NVSWITCH_TEMP_ENTRY
+ *
+ * This structure represents the NVSwitch TEMP with its timestamp.
+ *
+ *   value
+ *     This parameter specifies the NVSwitch Temperature
+ *     (SFXP 9.7 format in Celsius).
+ *
+ *   timestamp
+ *     This parameter specifies the timestamp (EPOCH) of the entry.
+ */
+typedef struct
+{
+    NvU16 value;
+    NvU32 timestamp;
+} NVSWITCH_TEMP_ENTRY;
+
+/*
+ * CTRL_NVSWITCH_GET_TEMP_DATA
+ *
+ * Control to get the NVSwitch device historical temperature information from inforom cache 
+ *
+ * Parameters:
+ *    tempMaxDayIdx [OUT]
+ *      The current index to the maximum day temperature array
+ *    tempMaxDay[] [OUT]
+ *      The maximum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
+ *    tempMaxWeekIdx [OUT]
+ *      The current index to the maximum week temperature array
+ *    tempMaxWeek[] [OUT]
+ *      The maximum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
+ *    tempMaxMntIdx [OUT]
+ *      The current index to the maximum month temperature array
+ *    tempMaxMnt[] [OUT]
+ *      The maximum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
+ *    tempMaxAllIdx [OUT]
+ *      The current index to the maximum temperature array
+ *    tempMaxAll[] [OUT]
+ *      The maximum temperature array for the device 
+ *    tempMinDayIdx [OUT]
+ *      The current index to the minimum day temperature array
+ *    tempMinDay[] [OUT]
+ *      The minimum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
+ *    tempMinWeekIdx [OUT]
+ *      The current index to the minimum week temperature array
+ *    tempMinWeek[] [OUT]
+ *      The minimum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
+ *    tempMinMntIdx [OUT]
+ *      The current index to the minimum month temperature array
+ *    tempMinMnt[] [OUT]
+ *      The minimum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
+ *    tempMinAllIdx [OUT]
+ *      The current index to the minimum temperature array
+ *    tempMinAll[] [OUT]
+ *      The minimum temperature array for the device
+ *    tempSumDelta [OUT]
+ *      The total sum of temperature change in 0.1C granularity
+ *    tempSumHour[] [OUT]
+ *      The moving average of temperature per hour, for last NVSWITCH_TEMP_SUM_HOUR_ENTRIES hours
+ *    tempSumDay[] [OUT]
+ *      The moving average of temperature per day, for last NVSWITCH_TEMP_SUM_DAY_ENTRIES days
+ *    tempSumMnt[] [OUT]
+ *      The moving average of temperature per month, for last NVSWITCH_TEMP_SUM_MNT_ENTRIES months
+ *    tempHistogramThld[] [OUT]
+ *      The histogram of temperature crossing various thresholds (5/10/15/.../95/100)
+ *    tempHistogramTime[] [OUT]
+ *      The histogram of time was in various temperature ranges (0..5/5..10/.../100..)
+ *    tempHourlyMaxSample[] [OUT]
+ *      The maximum hourly temperature array for the device
+ */
+
+typedef struct
+{
+    NvU32               tempMaxDayIdx;
+    NVSWITCH_TEMP_ENTRY tempMaxDay[NVSWITCH_TEMP_DAY_ENTRIES];
+    NvU32               tempMaxWeekIdx;
+    NVSWITCH_TEMP_ENTRY tempMaxWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
+    NvU32               tempMaxMntIdx;
+    NVSWITCH_TEMP_ENTRY tempMaxMnt[NVSWITCH_TEMP_MNT_ENTRIES];
+    NvU32               tempMaxAllIdx;
+    NVSWITCH_TEMP_ENTRY tempMaxAll[NVSWITCH_TEMP_ALL_ENTRIES];
+    NvU32               tempMinDayIdx;
+    NVSWITCH_TEMP_ENTRY tempMinDay[NVSWITCH_TEMP_DAY_ENTRIES];
+    NvU32               tempMinWeekIdx;
+    NVSWITCH_TEMP_ENTRY tempMinWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
+    NvU32               tempMinMntIdx;
+    NVSWITCH_TEMP_ENTRY tempMinMnt[NVSWITCH_TEMP_MNT_ENTRIES];
+    NvU32               tempMinAllIdx;
+    NVSWITCH_TEMP_ENTRY tempMinAll[NVSWITCH_TEMP_ALL_ENTRIES];
+    NvU32               tempSumDelta;
+    NvU32               tempSumHour[NVSWITCH_TEMP_SUM_HOUR_ENTRIES];
+    NvU32               tempSumDay[NVSWITCH_TEMP_SUM_DAY_ENTRIES];
+    NvU32               tempSumMnt[NVSWITCH_TEMP_SUM_MNT_ENTRIES];
+    NvU32               tempHistogramThld[NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES];
+    NvU32               tempHistogramTime[NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES];
+    NVSWITCH_TEMP_ENTRY tempHourlyMaxSample[NVSWITCH_TEMP_HOURLY_MAX_ENTRIES];
+} NVSWITCH_GET_TEMP_DATA_PARAMS;
+
+#define NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES   1096
+#define NVSWITCH_NUM_COMPRESSION_PERIODS        8
+
+/*
+ * CTRL_NVSWITCH_GET_TEMP_DATA
+ *
+ * Control to get the NVSwitch device temperature information from inforom cache 
+ *
+ * Parameters:
+ *    compressionPeriodIdx [OUT]
+ *      The current index to the sample period array
+ *    compressionPeriod[] [OUT]
+ *      The samples period array (seconds)
+ *    tempCompressionBuffer[] [OUT]
+ *      The temperature array sampling at a specific period in compressionPeriod[]
+ */
+
+typedef struct
+{
+    NvU32               compressionPeriodIdx;
+    NvU32               compressionPeriod[NVSWITCH_NUM_COMPRESSION_PERIODS];
+    NVSWITCH_TEMP_ENTRY tempCompressionBuffer[NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES];
+} NVSWITCH_GET_TEMP_SAMPLES_PARAMS;
+
 /*
 * CTRL_NVSWITCH_GET_FOM_VALUES
 *   This command gives the FOM values to MODS
@ -3848,6 +4040,10 @@ typedef struct
 #define CTRL_NVSWITCH_RESERVED_11                           0x55
 #define CTRL_NVSWITCH_GET_BOARD_PART_NUMBER                 0x56
 #define CTRL_NVSWITCH_GET_POWER                             0x57
+#define CTRL_NVSWITCH_GET_SYS_INFO                          0x58
+#define CTRL_NVSWITCH_GET_TIME_INFO                         0x59
+#define CTRL_NVSWITCH_GET_TEMP_DATA                         0x60
+#define CTRL_NVSWITCH_GET_TEMP_SAMPLES                      0x61

 #ifdef __cplusplus
 }
--- a/src/common/nvswitch/kernel/inc/haldef_nvswitch.h
+++ b/src/common/nvswitch/kernel/inc/haldef_nvswitch.h
@ -158,6 +158,7 @@
    _op(NvlStatus, nvswitch_bbx_unload, (nvswitch_device *device), _arch)  \
    _op(NvlStatus, nvswitch_bbx_load, (nvswitch_device *device, NvU64 time_ns, NvU8 osType, NvU32 osVersion), _arch)  \
    _op(NvlStatus, nvswitch_bbx_get_sxid, (nvswitch_device *device, NVSWITCH_GET_SXIDS_PARAMS * params), _arch)  \
+    _op(NvlStatus, nvswitch_bbx_get_data, (nvswitch_device *device, NvU8 dataType, void * params), _arch)  \
    _op(NvlStatus, nvswitch_smbpbi_alloc,           (nvswitch_device *device), _arch)  \
    _op(NvlStatus, nvswitch_smbpbi_post_init_hal,   (nvswitch_device *device), _arch)  \
    _op(void,      nvswitch_smbpbi_destroy_hal,     (nvswitch_device *device), _arch)  \
@ -235,6 +236,7 @@
    _op(NvlStatus, nvswitch_ctrl_therm_read_power, (nvswitch_device *device, NVSWITCH_GET_POWER_PARAMS *info), _arch) \
    _op(NvBool,    nvswitch_does_link_need_termination_enabled, (nvswitch_device *device, nvlink_link *link), _arch) \
    _op(NvlStatus, nvswitch_link_termination_setup, (nvswitch_device *device, nvlink_link *link), _arch) \
+    _op(NvlStatus, nvswitch_check_io_sanity, (nvswitch_device *device), _arch) \

 #define NVSWITCH_HAL_FUNCTION_LIST_LS10(_op, _arch) \
    _op(NvlStatus, nvswitch_launch_ALI, (nvswitch_device *device), _arch) \
--- a/src/common/nvswitch/kernel/inc/inforom/inforom_nvswitch.h
+++ b/src/common/nvswitch/kernel/inc/inforom/inforom_nvswitch.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -184,6 +184,7 @@ NvlStatus nvswitch_inforom_bbx_add_sxid(nvswitch_device *device,
                                    NvU32 data1, NvU32 data2);
 NvlStatus nvswitch_inforom_bbx_get_sxid(nvswitch_device *device,
                            NVSWITCH_GET_SXIDS_PARAMS *params);
+NvlStatus nvswitch_inforom_bbx_get_data(nvswitch_device *device, NvU8 dataType, void *params);

 // InfoROM DEM APIs
 NvlStatus nvswitch_inforom_dem_load(nvswitch_device *device);
--- a/src/common/nvswitch/kernel/inc/lr10/inforom_lr10.h
+++ b/src/common/nvswitch/kernel/inc/lr10/inforom_lr10.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -169,4 +169,12 @@ nvswitch_bbx_get_sxid_lr10
    NVSWITCH_GET_SXIDS_PARAMS * params
 );

+NvlStatus
+nvswitch_bbx_get_data_lr10
+(
+    nvswitch_device *device,
+    NvU8 dataType,
+    void *params
+);
+
 #endif //_INFOROM_LR10_H_
--- a/src/common/nvswitch/kernel/inc/ls10/inforom_ls10.h
+++ b/src/common/nvswitch/kernel/inc/ls10/inforom_ls10.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -154,4 +154,11 @@ nvswitch_bbx_get_sxid_ls10
    NVSWITCH_GET_SXIDS_PARAMS * params
 );

+NvlStatus
+nvswitch_bbx_get_data_ls10
+(
+    nvswitch_device *device,
+    NvU8 dataType,
+    void *params
+);
 #endif //_INFOROM_LS10_H_
--- a/src/common/nvswitch/kernel/inc/soe/bin/g_soeuc_lr10_dbg.h
+++ b/src/common/nvswitch/kernel/inc/soe/bin/g_soeuc_lr10_dbg.h
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
   0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03, 
   0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf, 
   0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801, 
-   0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00, 
+   0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00, 
   0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111, 
   0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d, 
   0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28, 
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
   0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894, 
   0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999, 
   0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff, 
-   0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c, 
+   0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c, 
   0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c, 
   0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0, 
   0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00, 
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-   0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa, 
-   0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9, 
+   0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7, 
+   0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
--- a/src/common/nvswitch/kernel/inc/soe/bin/g_soeuc_lr10_prd.h
+++ b/src/common/nvswitch/kernel/inc/soe/bin/g_soeuc_lr10_prd.h
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
   0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03, 
   0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf, 
   0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801, 
-   0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00, 
+   0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00, 
   0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111, 
   0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d, 
   0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28, 
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
   0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894, 
   0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999, 
   0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff, 
-   0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c, 
+   0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c, 
   0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c, 
   0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0, 
   0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00, 
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-   0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa, 
-   0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9, 
+   0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7, 
+   0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
   0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
--- a/src/common/nvswitch/kernel/inforom/ifrbbx_nvswitch.c
+++ b/src/common/nvswitch/kernel/inforom/ifrbbx_nvswitch.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -130,3 +130,21 @@ nvswitch_inforom_bbx_get_sxid
    return status;
 }

+NvlStatus
+nvswitch_inforom_bbx_get_data
+(
+    nvswitch_device *device,
+    NvU8 dataType,
+    void *params
+)
+{
+    NvlStatus status;
+
+    status = device->hal.nvswitch_bbx_get_data(device, dataType, params);
+    if (status != NVL_SUCCESS)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: (type=%d) failed, status=%d\n", __FUNCTION__, dataType, status);
+    }
+
+    return status;
+}
--- a/src/common/nvswitch/kernel/lr10/inforom_lr10.c
+++ b/src/common/nvswitch/kernel/lr10/inforom_lr10.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -32,6 +32,7 @@
 #include "nvVer.h"
 #include "regkey_nvswitch.h"
 #include "inforom/inforom_nvl_v3_nvswitch.h"
+#include "soe/soeififr.h"

 //
 // TODO: Split individual object hals to their own respective files
@ -1280,3 +1281,14 @@ nvswitch_bbx_get_sxid_lr10
    return -NVL_ERR_NOT_SUPPORTED;
 }

+NvlStatus
+nvswitch_bbx_get_data_lr10
+(
+    nvswitch_device *device,
+    NvU8 dataType,
+    void *params
+)
+{
+    return -NVL_ERR_NOT_SUPPORTED;
+}
+
--- a/src/common/nvswitch/kernel/lr10/link_lr10.c
+++ b/src/common/nvswitch/kernel/lr10/link_lr10.c
@ -566,6 +566,11 @@ nvswitch_init_lpwr_regs_lr10
    NvU8  softwareDesired, hardwareDisable;
    NvBool bLpEnable;

+    if (nvswitch_is_link_in_reset(device, link))
+    {
+        return;
+    }
+
    if (device->regkeys.enable_pm == NV_SWITCH_REGKEY_ENABLE_PM_NO)
    {
        return;
--- a/src/common/nvswitch/kernel/lr10/lr10.c
+++ b/src/common/nvswitch/kernel/lr10/lr10.c
@ -3656,6 +3656,15 @@ nvswitch_initialize_device_state_lr10
        goto nvswitch_initialize_device_state_exit;
    }

+    retval = nvswitch_check_io_sanity(device);
+    if (NVL_SUCCESS != retval)
+    {
+        NVSWITCH_PRINT(device, ERROR,
+            "%s: IO sanity test failed\n",
+            __FUNCTION__);
+        goto nvswitch_initialize_device_state_exit;
+    }
+
    NVSWITCH_PRINT(device, SETUP,
        "%s: MMIO discovery\n",
        __FUNCTION__);
@ -7850,6 +7859,15 @@ nvswitch_ctrl_get_nvlink_error_threshold_lr10
    return -NVL_ERR_NOT_SUPPORTED;
 }

+NvlStatus
+nvswitch_check_io_sanity_lr10
+(
+    nvswitch_device *device
+)
+{
+    return NVL_SUCCESS;
+}
+
 //
 // This function auto creates the lr10 HAL connectivity from the NVSWITCH_INIT_HAL
 // macro in haldef_nvswitch.h
--- a/src/common/nvswitch/kernel/ls10/inforom_ls10.c
+++ b/src/common/nvswitch/kernel/ls10/inforom_ls10.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -863,3 +863,174 @@ nvswitch_bbx_get_sxid_ls10_free_and_exit:

    return status;
 }
+
+NvlStatus
+nvswitch_bbx_get_data_ls10
+(
+    nvswitch_device *device,
+    NvU8 dataType,
+    void *params
+)
+{
+    NvlStatus status;
+    void *pDmaBuf;
+    NvU64 dmaHandle;
+    FLCN *pFlcn;
+    RM_FLCN_CMD_SOE bbxCmd;
+    NvU32 cmdSeqDesc;
+    NVSWITCH_TIMEOUT timeout;
+    NvU32 transferSize;
+
+    if (!nvswitch_is_inforom_supported_ls10(device))
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: InfoROM is not supported\n", __FUNCTION__);
+        return -NVL_ERR_NOT_SUPPORTED;
+    }
+
+    if (params == NULL)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: params is NULL\n", __FUNCTION__);
+        return -NVL_BAD_ARGS;
+    }
+
+    switch (dataType)
+    {
+        case RM_SOE_IFR_BBX_GET_SYS_INFO:
+            transferSize = sizeof(NVSWITCH_GET_SYS_INFO_PARAMS);
+        break;
+
+        case RM_SOE_IFR_BBX_GET_TIME_INFO:
+            transferSize = sizeof(NVSWITCH_GET_TIME_INFO_PARAMS);
+        break;
+
+        case RM_SOE_IFR_BBX_GET_TEMP_DATA:
+            transferSize = sizeof(NVSWITCH_GET_TEMP_DATA_PARAMS);
+        break;
+
+        case RM_SOE_IFR_BBX_GET_TEMP_SAMPLES:
+            transferSize = sizeof(NVSWITCH_GET_TEMP_SAMPLES_PARAMS);
+        break;
+        default:
+            NVSWITCH_PRINT(device, ERROR, "Unknown dataType %d", dataType);
+            return -NVL_BAD_ARGS;
+        break;
+    }
+
+    status = nvswitch_os_alloc_contig_memory(device->os_handle, &pDmaBuf, transferSize,
+                                            (device->dma_addr_width == 32));
+    if (status != NVL_SUCCESS)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: Failed to allocate contig memory. rc:%d\n", __FUNCTION__, status);
+        return status;
+    }
+
+    status = nvswitch_os_map_dma_region(device->os_handle, pDmaBuf, &dmaHandle,
+                                        transferSize, NVSWITCH_DMA_DIR_TO_SYSMEM);
+    if (status != NVL_SUCCESS)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: Failed to map DMA region. rc:%d\n", __FUNCTION__, status);
+        goto nvswitch_bbx_get_data_ls10_free_and_exit;
+    }
+
+    nvswitch_os_memset(pDmaBuf, 0, transferSize);
+
+    pFlcn = device->pSoe->pFlcn;
+    nvswitch_timeout_create(NVSWITCH_INTERVAL_5MSEC_IN_NS, &timeout);
+
+    nvswitch_os_memset(&bbxCmd, 0, sizeof(bbxCmd));
+    bbxCmd.hdr.unitId = RM_SOE_UNIT_IFR;
+    bbxCmd.hdr.size = sizeof(bbxCmd);
+    bbxCmd.cmd.ifr.cmdType = RM_SOE_IFR_BBX_DATA_GET;
+    bbxCmd.cmd.ifr.bbxDataGet.sizeInBytes = transferSize;
+    bbxCmd.cmd.ifr.bbxDataGet.dataType = dataType;
+    RM_FLCN_U64_PACK(&bbxCmd.cmd.ifr.bbxDataGet.dmaHandle, &dmaHandle);
+
+    status = flcnQueueCmdPostBlocking(device, pFlcn,
+                                (PRM_FLCN_CMD)&bbxCmd,
+                                NULL,   // pMsg
+                                NULL,   // pPayload
+                                SOE_RM_CMDQ_LOG_ID,
+                                &cmdSeqDesc,
+                                &timeout);
+    if (status != NV_OK)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: BX_GET_DATA type=%d failed. rc:%d\n",
+                        __FUNCTION__, dataType, status);
+        goto nvswitch_bbx_get_data_ls10_unmap_and_exit;
+    }
+
+    status = nvswitch_os_sync_dma_region_for_cpu(device->os_handle, dmaHandle,
+                                                        transferSize,
+                                                        NVSWITCH_DMA_DIR_TO_SYSMEM);
+    if (status != NV_OK)
+    {
+        NVSWITCH_PRINT(device, ERROR, "%s: Failed to sync DMA region. rc:%d\n", __FUNCTION__, status);
+        goto nvswitch_bbx_get_data_ls10_unmap_and_exit;
+    }
+
+    if (dataType == RM_SOE_IFR_BBX_GET_SYS_INFO)
+    {
+        NVSWITCH_GET_SYS_INFO_PARAMS bbxSysInfoData = {0};
+
+        nvswitch_os_memcpy((NvU8 *)&bbxSysInfoData, (NvU8 *)pDmaBuf, sizeof(NVSWITCH_GET_SYS_INFO_PARAMS));
+        nvswitch_os_memcpy((NvU8 *)params, (NvU8 *)&bbxSysInfoData, sizeof(NVSWITCH_GET_SYS_INFO_PARAMS));
+    }
+    else if (dataType == RM_SOE_IFR_BBX_GET_TIME_INFO)
+    {
+        NVSWITCH_GET_TIME_INFO_PARAMS bbxTimeInfoData = {0};
+
+        nvswitch_os_memcpy((NvU8 *)&bbxTimeInfoData, (NvU8 *)pDmaBuf, sizeof(NVSWITCH_GET_TIME_INFO_PARAMS));
+        nvswitch_os_memcpy((NvU8 *)params, (NvU8 *)&bbxTimeInfoData, sizeof(NVSWITCH_GET_TIME_INFO_PARAMS));
+    }
+    else if (dataType == RM_SOE_IFR_BBX_GET_TEMP_DATA)
+    {
+        NVSWITCH_GET_TEMP_DATA_PARAMS *pBbxTempData = NULL;
+
+        pBbxTempData = nvswitch_os_malloc(sizeof(NVSWITCH_GET_TEMP_DATA_PARAMS));
+        if (pBbxTempData == NULL)
+        {
+            NVSWITCH_PRINT(device, ERROR, "Out of memory: dataType %d", dataType);
+            status = -NVL_NO_MEM;
+            goto nvswitch_bbx_get_data_ls10_unmap_and_exit;
+        }
+        
+        nvswitch_os_memset(pBbxTempData, 0, sizeof(NVSWITCH_GET_TEMP_DATA_PARAMS));
+
+        nvswitch_os_memcpy((NvU8 *)pBbxTempData, (NvU8 *)pDmaBuf, sizeof(NVSWITCH_GET_TEMP_DATA_PARAMS));
+        nvswitch_os_memcpy((NvU8 *)params, (NvU8 *)pBbxTempData, sizeof(NVSWITCH_GET_TEMP_DATA_PARAMS));
+
+        nvswitch_os_free(pBbxTempData);
+    }
+    else if (dataType == RM_SOE_IFR_BBX_GET_TEMP_SAMPLES)
+    {
+        NVSWITCH_GET_TEMP_SAMPLES_PARAMS *pBbxTempSamples = NULL;
+
+        pBbxTempSamples = nvswitch_os_malloc(sizeof(NVSWITCH_GET_TEMP_SAMPLES_PARAMS));
+        if (pBbxTempSamples == NULL)
+        {
+            NVSWITCH_PRINT(device, ERROR, "Out of memory: dataType %d", dataType);
+            status = -NVL_NO_MEM;
+            goto nvswitch_bbx_get_data_ls10_unmap_and_exit;
+        }
+        
+        nvswitch_os_memset(pBbxTempSamples, 0, sizeof(NVSWITCH_GET_TEMP_SAMPLES_PARAMS));
+
+        nvswitch_os_memcpy((NvU8 *)pBbxTempSamples, (NvU8 *)pDmaBuf, sizeof(NVSWITCH_GET_TEMP_SAMPLES_PARAMS));
+        nvswitch_os_memcpy((NvU8 *)params, (NvU8 *)pBbxTempSamples, sizeof(NVSWITCH_GET_TEMP_SAMPLES_PARAMS));
+
+        nvswitch_os_free(pBbxTempSamples);
+    }
+    else
+    {
+        NVSWITCH_PRINT(device, ERROR, "Unknown dataType %d", dataType);
+        goto nvswitch_bbx_get_data_ls10_unmap_and_exit;
+    }
+
+nvswitch_bbx_get_data_ls10_unmap_and_exit:
+    nvswitch_os_unmap_dma_region(device->os_handle, pDmaBuf, dmaHandle,
+                                        transferSize, NVSWITCH_DMA_DIR_FROM_SYSMEM);
+nvswitch_bbx_get_data_ls10_free_and_exit:
+    nvswitch_os_free_contig_memory(device->os_handle, pDmaBuf, transferSize);
+
+    return status;
+}
--- a/src/common/nvswitch/kernel/ls10/ls10.c
+++ b/src/common/nvswitch/kernel/ls10/ls10.c
@ -42,6 +42,7 @@
 #include "ls10/gfw_ls10.h"

 #include "nvswitch/ls10/dev_nvs_top.h"
+#include "nvswitch/ls10/ptop_discovery_ip.h"
 #include "nvswitch/ls10/dev_pri_masterstation_ip.h"
 #include "nvswitch/ls10/dev_pri_hub_sys_ip.h"
 #include "nvswitch/ls10/dev_nvlw_ip.h"
@ -5839,6 +5840,104 @@ nvswitch_ctrl_get_nvlink_error_threshold_ls10
    return NVL_SUCCESS;
 }

+NvlStatus
+nvswitch_check_io_sanity_ls10
+(
+    nvswitch_device *device
+)
+{
+    NvBool keepPolling;
+    NVSWITCH_TIMEOUT timeout;
+    NvU32   val;
+    NvBool error = NV_FALSE;
+    NvU32 sxid;
+    const char *sxid_desc = NULL;
+
+    //
+    // NOTE: MMIO discovery has not been performed so only constant BAR0 offset
+    // addressing can be performed.
+    //
+
+    // BAR0 offset 0 should always contain valid data -- unless it doesn't
+    val = NVSWITCH_OFF_RD32(device, 0);
+    if (val == 0)
+    {
+        error = NV_TRUE;
+        sxid = NVSWITCH_ERR_HW_HOST_FIRMWARE_RECOVERY_MODE;
+        sxid_desc = "Firmware recovery mode";
+    }
+    else if ((val == 0xFFFFFFFF) || ((val & 0xFFFF0000) == 0xBADF0000))
+    {
+        error = NV_TRUE;
+        sxid = NVSWITCH_ERR_HW_HOST_IO_FAILURE;
+        sxid_desc = "IO failure";
+    }
+    else if (!IS_FMODEL(device))
+    {
+        // check if FSP successfully started
+        nvswitch_timeout_create(10 * NVSWITCH_INTERVAL_1SEC_IN_NS, &timeout);
+        do
+        {
+            keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
+
+            val = NVSWITCH_REG_RD32(device, _GFW_GLOBAL, _BOOT_PARTITION_PROGRESS);
+            if (FLD_TEST_DRF(_GFW_GLOBAL, _BOOT_PARTITION_PROGRESS, _VALUE, _SUCCESS, val))
+            {
+                break;
+            }
+
+            nvswitch_os_sleep(1);
+        }
+        while (keepPolling);
+        if (!FLD_TEST_DRF(_GFW_GLOBAL, _BOOT_PARTITION_PROGRESS, _VALUE, _SUCCESS, val))
+        {
+            error = NV_TRUE;
+            sxid = NVSWITCH_ERR_HW_HOST_FIRMWARE_INITIALIZATION_FAILURE;
+            sxid_desc = "Firmware initialization failure";
+        }
+    }
+
+    if (error)
+    {
+        NVSWITCH_RAW_ERROR_LOG_TYPE report = { 0, { 0 } };
+        NVSWITCH_RAW_ERROR_LOG_TYPE report_saw = {0, { 0 }};
+        NvU32 report_idx = 0;
+        NvU32 i;
+
+        val = NVSWITCH_REG_RD32(device, _GFW_GLOBAL, _BOOT_PARTITION_PROGRESS);
+        report.data[report_idx++] = val;
+        NVSWITCH_PRINT(device, ERROR, "%s: -- _GFW_GLOBAL, _BOOT_PARTITION_PROGRESS (0x%x) != _SUCCESS --\n",
+            __FUNCTION__, val);
+
+        for (i = 0; i <= 15; i++)
+        {
+            val = NVSWITCH_OFF_RD32(device,
+                NV_PTOP_UNICAST_SW_DEVICE_BASE_SAW_0 + NV_NVLSAW_SW_SCRATCH(i));
+            report_saw.data[i] = val;
+            NVSWITCH_PRINT(device, ERROR, "%s: -- NV_NVLSAW_SW_SCRATCH(%d) = 0x%08x\n",
+                __FUNCTION__, i, val);
+        }
+
+        for (i = 0; i < NV_PFSP_FALCON_COMMON_SCRATCH_GROUP_2__SIZE_1; i++)
+        {
+            val = NVSWITCH_REG_RD32(device, _PFSP, _FALCON_COMMON_SCRATCH_GROUP_2(i));
+            report.data[report_idx++] = val;
+                NVSWITCH_PRINT(device, ERROR, "%s: -- NV_PFSP_FALCON_COMMON_SCRATCH_GROUP_2(%d) = 0x%08x\n",
+                __FUNCTION__, i, val);
+        }
+
+        // Include useful scratch information for triage
+        NVSWITCH_PRINT_SXID_NO_BBX(device, sxid,
+            "Fatal, %s (0x%x/0x%x, 0x%x, 0x%x, 0x%x/0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", sxid_desc,
+            report.data[0], report.data[1], report.data[2], report.data[3], report.data[4],
+            report_saw.data[0], report_saw.data[1], report_saw.data[12], report_saw.data[14], report_saw.data[15]);
+
+        return -NVL_INITIALIZATION_TOTAL_FAILURE;
+    }
+
+    return NVL_SUCCESS;
+}
+
 NvlStatus
 nvswitch_read_vbios_link_entries_ls10
 (
--- a/src/common/nvswitch/kernel/nvswitch.c
+++ b/src/common/nvswitch/kernel/nvswitch.c
@ -30,6 +30,7 @@
 #include "flcn/haldefs_flcnable_nvswitch.h"
 #include "flcn/flcn_nvswitch.h"
 #include "soe/soe_nvswitch.h"
+#include "soe/soeififr.h"
 #include "nvVer.h"
 #include "nvlink_inband_msg.h"

@ -3475,6 +3476,46 @@ _nvswitch_ctrl_get_inforom_bbx_sxid
    return nvswitch_inforom_bbx_get_sxid(device, params);
 }

+static NvlStatus
+_nvswitch_ctrl_get_inforom_bbx_sys_info
+(
+    nvswitch_device *device,
+    NVSWITCH_GET_SYS_INFO_PARAMS *params
+)
+{
+    return nvswitch_inforom_bbx_get_data(device, RM_SOE_IFR_BBX_GET_SYS_INFO, (void *)params);
+}
+
+static NvlStatus
+_nvswitch_ctrl_get_inforom_bbx_time_info
+(
+    nvswitch_device *device,
+    NVSWITCH_GET_TIME_INFO_PARAMS *params
+)
+{
+    return nvswitch_inforom_bbx_get_data(device, RM_SOE_IFR_BBX_GET_TIME_INFO, (void *)params);
+}
+
+static NvlStatus
+_nvswitch_ctrl_get_inforom_bbx_temp_data
+(
+    nvswitch_device *device,
+    NVSWITCH_GET_TEMP_DATA_PARAMS *params
+)
+{
+    return nvswitch_inforom_bbx_get_data(device, RM_SOE_IFR_BBX_GET_TEMP_DATA, (void *)params);
+}
+
+static NvlStatus
+_nvswitch_ctrl_get_inforom_bbx_temp_samples
+(
+    nvswitch_device *device,
+    NVSWITCH_GET_TEMP_SAMPLES_PARAMS *params
+)
+{
+    return nvswitch_inforom_bbx_get_data(device, RM_SOE_IFR_BBX_GET_TEMP_SAMPLES, (void *)params);
+}
+
 static NvlStatus
 _nvswitch_ctrl_get_nvlink_lp_counters
 (
@ -4666,6 +4707,15 @@ nvswitch_program_l1_scratch_reg
   device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
 }

+NvlStatus
+nvswitch_check_io_sanity
+(
+    nvswitch_device *device
+)
+{
+    return device->hal.nvswitch_check_io_sanity(device);
+}
+
 NvlStatus
 nvswitch_launch_ALI
 (
@ -5181,6 +5231,26 @@ nvswitch_lib_ctrl
        NVSWITCH_DEV_CMD_DISPATCH(CTRL_NVSWITCH_GET_POWER,
                _nvswitch_ctrl_therm_read_power,
                NVSWITCH_GET_POWER_PARAMS);
+        NVSWITCH_DEV_CMD_DISPATCH_PRIVILEGED(
+                CTRL_NVSWITCH_GET_SYS_INFO,
+                _nvswitch_ctrl_get_inforom_bbx_sys_info,
+                NVSWITCH_GET_SYS_INFO_PARAMS,
+                osPrivate, flags);
+        NVSWITCH_DEV_CMD_DISPATCH_PRIVILEGED(
+                CTRL_NVSWITCH_GET_TIME_INFO,
+                _nvswitch_ctrl_get_inforom_bbx_time_info,
+                NVSWITCH_GET_TIME_INFO_PARAMS,
+                osPrivate, flags);
+        NVSWITCH_DEV_CMD_DISPATCH_PRIVILEGED(
+                CTRL_NVSWITCH_GET_TEMP_DATA,
+                _nvswitch_ctrl_get_inforom_bbx_temp_data,
+                NVSWITCH_GET_TEMP_DATA_PARAMS,
+                osPrivate, flags);
+        NVSWITCH_DEV_CMD_DISPATCH_PRIVILEGED(
+                CTRL_NVSWITCH_GET_TEMP_SAMPLES,
+                _nvswitch_ctrl_get_inforom_bbx_temp_samples,
+                NVSWITCH_GET_TEMP_SAMPLES_PARAMS,
+                osPrivate, flags);

        default:
            nvswitch_os_print(NVSWITCH_DBG_LEVEL_INFO, "unknown ioctl %x\n", cmd);
--- a/src/common/sdk/nvidia/inc/class/cl90e7.h
+++ b/src/common/sdk/nvidia/inc/class/cl90e7.h
@ -0,0 +1,38 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _cl90e7_h_
+#define _cl90e7_h_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GF100_SUBDEVICE_INFOROM                             (0x000090e7)
+
+#ifdef __cplusplus
+};     /* extern "C" */
+#endif
+
+#endif // _cl90e7_h
+
--- a/src/common/sdk/nvidia/inc/class/clcba2.h
+++ b/src/common/sdk/nvidia/inc/class/clcba2.h
@ -118,6 +118,7 @@ typedef volatile struct _clcba2_tag0 {

 // Class definitions
 #define NVCBA2_DECRYPT_COPY_SIZE_MAX_BYTES                                      (2*1024*1024)
+#define NVCBA2_DECRYPT_SCRUB_SIZE_MAX_BYTES                                     (1024*1024*1024)

 // Errors
 #define NVCBA2_ERROR_NONE                                                       (0x00000000)
@ -149,6 +150,7 @@ typedef volatile struct _clcba2_tag0 {
 #define NVCBA2_ERROR_SCRUBBER_INVALD_ADDRESS                                    (0x0000001a)
 #define NVCBA2_ERROR_SCRUBBER_INSUFFICIENT_PERMISSIONS                          (0x0000001b)
 #define NVCBA2_ERROR_SCRUBBER_MUTEX_ACQUIRE_FAILURE                             (0x0000001c)
+#define NVCBA2_ERROR_SCRUB_SIZE_MAX_EXCEEDED                                    (0x0000001d)

 #ifdef __cplusplus
 };     /* extern "C" */
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl30f1.h
@ -887,10 +887,6 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 *   This parameter is set by the client to indicate the 
 *   gpuId of the GPU to which the display to be optimized
 *   is attached.
- * display
- *   This parameter is not used by RM currently.
- *   Clients can ignore this parameter.  Note that this
- *   parameter will be removed in future.
 * output
 *   This parameter is set by the client to indicate the 
 *   output resource type of the display to be optimized.
@ -1033,6 +1029,12 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 *   optimal pixel clock to use with the adjusted mode, 
 *   in units of Hz. 
 *
+ *
+ * bOptimized[out]
+ *   This is set to NV_TRUE if the timings were successfully optimized, and
+ *   NV_FALSE otherwise.
+ *
+ *
 * Progressive Raster Structure
 *
 *                 hSyncEnd                            hTotal
@ -1145,28 +1147,29 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
 #define NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS_MESSAGE_ID (0x60U)

 typedef struct NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS {
-    NvU32 gpuId;
-    NvU32 display;
-    NvU32 output;
-    NvU32 protocol;
-    NvU32 structure;
-    NvU32 adjust;
-    NvU32 hDeltaStep;
-    NvU32 hDeltaMax;
-    NvU32 vDeltaStep;
-    NvU32 vDeltaMax;
-    NvU32 hSyncEnd;
-    NvU32 hBlankEnd;
-    NvU32 hBlankStart;
-    NvU32 hTotal;
-    NvU32 vSyncEnd;
-    NvU32 vBlankEnd;
-    NvU32 vBlankStart;
-    NvU32 vInterlacedBlankEnd;
-    NvU32 vInterlacedBlankStart;
-    NvU32 vTotal;
-    NvU32 refreshX10K;
-    NvU32 pixelClockHz;
+    NvU32  gpuId;
+    NvU32  output;
+    NvU32  protocol;
+    NvU32  structure;
+    NvU32  adjust;
+    NvU32  hDeltaStep;
+    NvU32  hDeltaMax;
+    NvU32  vDeltaStep;
+    NvU32  vDeltaMax;
+    NvU32  hSyncEnd;
+    NvU32  hBlankEnd;
+    NvU32  hBlankStart;
+    NvU32  hTotal;
+    NvU32  vSyncEnd;
+    NvU32  vBlankEnd;
+    NvU32  vBlankStart;
+    NvU32  vInterlacedBlankEnd;
+    NvU32  vInterlacedBlankStart;
+    NvU32  vTotal;
+    NvU32  refreshX10K;
+    NvU32  pixelClockHz;
+
+    NvBool bOptimized;
 } NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS;

 /* output values */
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl90e7.h
@ -0,0 +1,72 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <nvtypes.h>
+
+//
+// This file was generated with FINN, an NVIDIA coding tool.
+// Source file:      ctrl/ctrl90e7.finn
+//
+
+#include "nvfixedtypes.h"
+#include "ctrl/ctrlxxxx.h"
+/* GF100_SUBDEVICE_INFOROM control commands and parameters */
+
+#define NV90E7_CTRL_CMD(cat,idx) NVXXXX_CTRL_CMD(0x90E7, NV90E7_CTRL_##cat, idx)
+
+/* Command categories (6 bits) */
+#define NV90E7_CTRL_RESERVED                    (0x00)
+#define NV90E7_CTRL_BBX                         (0x01)
+
+
+
+/*
+ * NV90E7_CTRL_CMD_BBX_GET_LAST_FLUSH_TIME
+ *
+ * This command is used to query the last BBX flush timestamp and duration. If BBX has not yet
+ * been flushed, the status returned is NV_ERR_NOT_READY.
+ *
+ *   timestamp
+ *     This parameter specifies the start timestamp of the last BBX flush.
+ *
+ *   durationUs
+ *     This parameter specifies the duration (us) of the last BBX flush.
+ *
+ * Possible status values returned are:
+ *   NV_OK
+ *   NV_ERR_NOT_READY
+ *   NV_ERR_NOT_SUPPORTED
+ */
+#define NV90E7_CTRL_CMD_BBX_GET_LAST_FLUSH_TIME (0x90e70113) /* finn: Evaluated from "(FINN_GF100_SUBDEVICE_INFOROM_BBX_INTERFACE_ID << 8) | NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS_MESSAGE_ID" */
+
+#define NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS_MESSAGE_ID (0x13U)
+
+typedef struct NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS {
+    NV_DECLARE_ALIGNED(NvU64 timestamp, 8);
+    NvU32 durationUs;
+} NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS;
+
+
+
--- a/src/common/sdk/nvidia/inc/ctrl/ctrlb0cc/ctrlb0ccprofiler.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrlb0cc/ctrlb0ccprofiler.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2018-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -556,4 +556,36 @@ typedef struct NVB0CC_CTRL_RELEASE_HES_PARAMS {
    NVB0CC_CTRL_HES_TYPE type;
 } NVB0CC_CTRL_RELEASE_HES_PARAMS;

+/*!
+ * NVB0CC_CTRL_CMD_DISABLE_DYNAMIC_MMA_BOOST
+ * 
+ * Disable the Dynamic MMA clock boost during profiler lifetime.
+ *
+ */
+#define NVB0CC_CTRL_CMD_DISABLE_DYNAMIC_MMA_BOOST (0xb0cc0117) /* finn: Evaluated from "(FINN_MAXWELL_PROFILER_PROFILER_INTERFACE_ID << 8) | NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS_MESSAGE_ID" */
+#define NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS_MESSAGE_ID (0x17U)
+
+typedef struct NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS {
+    /*!
+     * [in]: En/Disable Dynamic MMA Boost.  True = disable Boost; False = re-enable Boost.
+     */
+    NvBool disable;
+} NVB0CC_CTRL_DISABLE_DYNAMIC_MMA_BOOST_PARAMS;
+
+/*!
+ * NVB0CC_CTRL_CMD_GET_DYNAMIC_MMA_BOOST_STATUS
+ * 
+ * Request the Dynamic MMA clock boost feature enablement status.
+ *
+ */
+#define NVB0CC_CTRL_CMD_GET_DYNAMIC_MMA_BOOST_STATUS (0xb0cc0118) /* finn: Evaluated from "(FINN_MAXWELL_PROFILER_PROFILER_INTERFACE_ID << 8) | NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS_MESSAGE_ID" */
+#define NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS_MESSAGE_ID (0x18U)
+
+typedef struct NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS {
+    /*!
+     * [out]: Dynamic MMA Boost status: true = boost enabled/available; False = Boost disabled/unavailable.
+     */
+    NvBool enabled;
+} NVB0CC_CTRL_GET_DYNAMIC_MMA_BOOST_STATUS_PARAMS;
+
 /* _ctrlb0ccprofiler_h_ */
--- a/src/common/sdk/nvidia/inc/g_finn_rm_api.h
+++ b/src/common/sdk/nvidia/inc/g_finn_rm_api.h
@ -558,8 +558,6 @@ typedef FINN_RM_API FINN_GF100_SUBDEVICE_MASTER_MASTER;
 typedef FINN_RM_API FINN_GF100_SUBDEVICE_INFOROM_RESERVED;
 #define FINN_GF100_SUBDEVICE_INFOROM_BBX_INTERFACE_ID (0x90e701U)
 typedef FINN_RM_API FINN_GF100_SUBDEVICE_INFOROM_BBX;
-#define FINN_GF100_SUBDEVICE_INFOROM_RPR_INTERFACE_ID (0x90e702U)
-typedef FINN_RM_API FINN_GF100_SUBDEVICE_INFOROM_RPR;

 #define FINN_GF100_HDACODEC_RESERVED_INTERFACE_ID (0x90ec00U)
 typedef FINN_RM_API FINN_GF100_HDACODEC_RESERVED;
--- a/src/common/sdk/nvidia/inc/nverror.h
+++ b/src/common/sdk/nvidia/inc/nverror.h
@ -122,7 +122,8 @@
 #define ROBUST_CHANNEL_DLA_ERROR                        (138)
 #define ROBUST_CHANNEL_FAST_PATH_ERROR                  (139)
 #define UNRECOVERABLE_ECC_ERROR_ESCAPE                  (140)
-#define ROBUST_CHANNEL_LAST_ERROR                       (UNRECOVERABLE_ECC_ERROR_ESCAPE)
+#define GPU_INIT_ERROR                                  (143)
+#define ROBUST_CHANNEL_LAST_ERROR                       (GPU_INIT_ERROR)


 // Indexed CE reference
--- a/src/common/uproc/os/common/include/libos_v3_crashcat.h
+++ b/src/common/uproc/os/common/include/libos_v3_crashcat.h
@ -0,0 +1,174 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef LIBOS_V3_CRASH_CAT_H
+#define LIBOS_V3_CRASH_CAT_H
+
+#include "nv-crashcat.h"
+#include "nv-crashcat-decoder.h"
+
+// libosv3 implements the CrashCat V1 protocol with the following implementation-defined bits
+
+typedef enum
+{
+    LibosPanicReasonUnspecified           = 0x00,
+    LibosPanicReasonUnhandledState        = 0x01,
+    LibosPanicReasonInvalidConfiguration  = 0x02,
+    LibosPanicReasonFatalHardwareError    = 0x03,
+    LibosPanicReasonInsufficientResources = 0x04,
+    LibosPanicReasonTimeout               = 0x05,
+    LibosPanicReasonEnvCallFailed         = 0x06,
+    LibosPanicReasonAsanMemoryError       = 0x08,
+    LibosPanicReasonProgrammingError      = 0x0a,
+    LibosPanicReasonAssertionFailed       = 0x0b,
+    LibosPanicReasonTrapKernelPanic       = 0x0c,
+    LibosPanicReasonTrapInstruction       = 0x0d,
+    LibosPanicReasonCount
+} LibosPanicReason;
+
+typedef enum{
+    LibosKernelModuleInit          = 0x00,
+    LibosKernelModuleIpi           = 0x01,
+    LibosKernelModuleLoader        = 0x02,
+    LibosKernelModulePartition     = 0x03,
+    LibosKernelModuleSbi           = 0x04,
+    LibosKernelModulePagetable     = 0x05,
+    LibosKernelModuleAddressSpace  = 0x06,
+    LibosKernelModuleIdentity      = 0x07,
+    LibosKernelModuleObjectPool    = 0x08,
+    LibosKernelModulePageState     = 0x09,
+    LibosKernelModuleMemoryPool    = 0x10,
+    LibosKernelModuleBuddyState    = 0x11,
+    LibosKernelModuleServer        = 0x12,
+    LibosKernelModuleDmaDriver     = 0x13,
+    LibosKernelModuleExtIntrDriver = 0x14,
+    LibosKernelModuleSoftMmuCore   = 0x15,
+    LibosKernelModulePagestream    = 0x16,
+    LibosKernelModuleTask          = 0x17,
+    LibosKernelModuleGdmaDriver    = 0x18,
+    LibosKernelModuleCacheDriver   = 0x19,
+    LibosKernelModuleMinTree       = 0x20,
+    LibosKernelModuleList          = 0x21,
+    LibosKernelModuleSpinlock      = 0x22,
+    LibosKernelModuleScheduler     = 0x23,
+    LibosKernelModulePort          = 0x24,
+    LibosKernelModuleGlobalPort    = 0x25,
+    LibosKernelModuleProxyPort     = 0x26,
+    LibosKernelModuleTimer         = 0x27,
+} LibosKernelModule;
+
+// NV_CRASHCAT_REPORT_IMPLEMENTER_SIGNATURE (bits 63:0) - "LIBOS3.1"
+#define NV_CRASHCAT_REPORT_IMPLEMENTER_SIGNATURE_LIBOS3 (0x4C49424F53332E31ull)
+
+// NV_CRASHCAT_REPORT_V1_REPORTER_ID_IMPL_DEF (bits 63:18)
+#define NV_CRASHCAT_REPORT_V1_REPORTER_ID_LIBOS3_TASK_ID              31:24
+#define NV_CRASHCAT_REPORT_V1_REPORTER_ID_LIBOS3_RESERVED             63:32
+#define NV_CRASHCAT_REPORT_V1_REPORTER_ID_LIBOS3_TASK_ID_UNSPECIFIED  (0xFF)
+
+static NV_INLINE
+void crashcatReportV1SetReporterLibos3TaskId(NvCrashCatReport_V1 *pReport, NvU8 task_id)
+{
+    pReport->reporterId = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_REPORTER_ID, _LIBOS3_TASK_ID,
+                                            task_id, pReport->reporterId);
+}
+
+static NV_INLINE
+NvU8 crashcatReportV1ReporterLibos3TaskId(NvCrashCatReport_V1 *pReport)
+{
+    return (NvU8)DRF_VAL64(_CRASHCAT, _REPORT_V1_REPORTER_ID, _LIBOS3_TASK_ID, pReport->reporterId);
+}
+
+// NV_CRASHCAT_REPORT_V1_REPORTER_DATA_VERSION (bits 31:0)
+#define NV_CRASHCAT_REPORT_V1_REPORTER_DATA_VERSION_LIBOS3_CL         23:0
+#define NV_CRASHCAT_REPORT_V1_REPORTER_DATA_VERSION_LIBOS3_MINOR      27:24
+#define NV_CRASHCAT_REPORT_V1_REPORTER_DATA_VERSION_LIBOS3_MAJOR      31:28
+
+static NV_INLINE
+void crashcatReportV1SetReporterVersionLibos3(NvCrashCatReport_V1 *pReport, NvU32 cl)
+{
+    pReport->reporterData = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_REPORTER_DATA,
+                                              _VERSION_LIBOS3_MAJOR, 3, pReport->reporterData);
+    pReport->reporterData = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_REPORTER_DATA,
+                                              _VERSION_LIBOS3_MINOR, 1, pReport->reporterData);
+    pReport->reporterData = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_REPORTER_DATA,
+                                              _VERSION_LIBOS3_CL, cl, pReport->reporterData);
+}
+
+static NV_INLINE
+NvU32 crashcatReportV1ReporterVersionLibos3Cl(NvCrashCatReport_V1 *pReport)
+{
+    return DRF_VAL(_CRASHCAT, _REPORT_V1_REPORTER_DATA_VERSION, _LIBOS3_CL,
+                   crashcatReportV1ReporterVersion(pReport));
+}
+
+static NV_INLINE
+NvU8 crashcatReportV1ReporterVersionLibos3Minor(NvCrashCatReport_V1 *pReport)
+{
+    return (NvU8)DRF_VAL(_CRASHCAT, _REPORT_V1_REPORTER_DATA_VERSION, _LIBOS3_MINOR,
+                         crashcatReportV1ReporterVersion(pReport));
+}
+
+static NV_INLINE
+NvU8 crashcatReportV1ReporterVersionLibos3Major(NvCrashCatReport_V1 *pReport)
+{
+    return (NvU8)DRF_VAL(_CRASHCAT, _REPORT_V1_REPORTER_DATA_VERSION, _LIBOS3_MAJOR,
+                         crashcatReportV1ReporterVersion(pReport));
+}
+
+// NV_CRASHCAT_REPORT_V1_SOURCE_ID_IMPL_DEF (63:18)
+#define NV_CRASHCAT_REPORT_V1_SOURCE_ID_LIBOS3_TASK_ID                31:24
+#define NV_CRASHCAT_REPORT_V1_SOURCE_ID_LIBOS3_RESERVED               63:32
+#define NV_CRASHCAT_REPORT_V1_SOURCE_ID_LIBOS3_TASK_ID_UNSPECIFIED    (0xFF)
+
+static NV_INLINE
+void crashcatReportV1SetSourceLibos3TaskId(NvCrashCatReport_V1 *pReport, NvU8 task_id)
+{
+    pReport->sourceId = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_SOURCE_ID, _LIBOS3_TASK_ID, task_id,
+                                          pReport->sourceId);
+}
+
+static NV_INLINE
+NvU8 crashcatReportV1SourceLibos3TaskId(NvCrashCatReport_V1 *pReport)
+{
+    return (NvU8)DRF_VAL64(_CRASHCAT, _REPORT_V1_SOURCE_ID, _LIBOS3_TASK_ID, pReport->sourceId);
+}
+
+// NV_CRASHCAT_REPORT_V1_SOURCE_CAUSE_IMPL_DEF (63:32)
+#define NV_CRASHCAT_REPORT_V1_SOURCE_CAUSE_LIBOS3_REASON              39:32
+#define NV_CRASHCAT_REPORT_V1_SOURCE_CAUSE_LIBOS3_RESERVED            63:40
+
+static NV_INLINE
+void crashcatReportV1SetSourceCauseLibos3Reason(NvCrashCatReport_V1 *pReport,
+                                                LibosPanicReason reason)
+{
+    pReport->sourceCause = FLD_SET_DRF_NUM64(_CRASHCAT, _REPORT_V1_SOURCE_CAUSE, _LIBOS3_REASON,
+                                             reason, pReport->sourceCause);
+}
+
+static NV_INLINE
+LibosPanicReason crashcatReportV1SourceCauseLibos3Reason(NvCrashCatReport_V1 *pReport)
+{
+    return (LibosPanicReason)DRF_VAL64(_CRASHCAT, _REPORT_V1_SOURCE_CAUSE, _LIBOS3_REASON,
+                                       pReport->sourceCause);
+}
+
+#endif // LIBOS_V3_CRASH_CAT_H
--- a/src/nvidia-modeset/src/nvkms-evo.c
+++ b/src/nvidia-modeset/src/nvkms-evo.c
@ -734,6 +734,9 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
            return;
    }

+    nvEvoLogInfoString(pInfoString,
+            "Adjusting Mode Timings for Quadro Sync Compatibility");
+
    ret = nvRmApiControl(nvEvoGlobal.clientHandle,
                         pDispEvo->pFrameLockEvo->device,
                         NV30F1_CTRL_CMD_GSYNC_GET_OPTIMIZED_TIMING,
@ -744,12 +747,13 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
        nvAssert(!"Failed to convert to Quadro Sync safe timing");
        /* do not apply the timings returned by RM if the call failed */
        return;
+    } else if (!gsyncOptTimingParams.bOptimized) {
+        nvEvoLogInfoString(pInfoString, " Timings Unchanged.");
+        return;
    }

    nvConstructNvModeTimingsFromHwModeTimings(pTimings, &modeTimings);

-    nvEvoLogInfoString(pInfoString,
-            "Adjusting Mode Timings for Quadro Sync Compatibility");
    nvEvoLogInfoString(pInfoString, " Old Timings:");
    nvEvoLogModeValidationModeTimings(pInfoString, &modeTimings);

@ -5263,13 +5267,6 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
                                  NVKMS_MODE_VALIDATION_REQUIRE_BOOT_CLOCKS);
    NvU32 ret;

-    /* bypass this checking if the user disabled IMP */
-
-    if ((pParams->overrides &
-         NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
-        return TRUE;
-    }
-
    activeRmId = nvRmAllocDisplayId(pConnectorEvo->pDispEvo,
                    nvAddDpyIdToEmptyDpyIdList(pConnectorEvo->displayId));
    if (activeRmId == 0x0) {
@ -5294,11 +5291,18 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
        timingsParams[head].pUsage = &timings[head].viewPort.guaranteedUsage;
    }

-    ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
-                                        requireBootClocks,
-                                        NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
-                                        /* downgradePossibleHeadsBitMask */
-                                        (NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
+    /* bypass this checking if the user disabled IMP */
+    if ((pParams->overrides &
+         NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
+        ret = TRUE;
+    } else {
+        ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
+                                            requireBootClocks,
+                                            NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
+                                            /* downgradePossibleHeadsBitMask */
+                                            (NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
+    }
+
    if (ret) {
        *pNumHeads = numHeads;
    } else {
--- a/src/nvidia/arch/nvalloc/unix/src/os.c
+++ b/src/nvidia/arch/nvalloc/unix/src/os.c
@ -404,36 +404,33 @@ static NV_STATUS setNumaPrivData
    NV_STATUS rmStatus = NV_OK;
    void *pAllocPrivate = NULL;
    NvU64 *addrArray = NULL;
-    NvU64 numOsPages = pMemDesc->PageCount;
+    NvU64 numPages = pMemDesc->PageCount;
+    NvU64 i;

-    addrArray = pMemDesc->_pteArray;
+    addrArray = portMemAllocNonPaged(numPages * sizeof(NvU64));
+    if (addrArray == NULL)
+    {
+        return NV_ERR_NO_MEMORY;
+    }
+
+    portMemCopy((void*)addrArray,
+                (numPages * sizeof(NvU64)),
+                (void*)memdescGetPteArray(pMemDesc, AT_CPU),
+                (numPages * sizeof(NvU64)));

    if (NV_RM_PAGE_SIZE < os_page_size)
    {
-        NvU64 numPages;
-        NvU64 i;
-
-        numPages = pMemDesc->PageCount;
-        addrArray = portMemAllocNonPaged(numPages * sizeof(NvU64));
-        if (addrArray == NULL)
-        {
-            return NV_ERR_NO_MEMORY;
-        }
-
-        portMemCopy((void*)addrArray,
-                    (numPages * sizeof(NvU64)), (void*)pMemDesc->_pteArray,
-                    (numPages * sizeof(NvU64)));
        RmDeflateRmToOsPageArray(addrArray, numPages);
-        numOsPages = NV_RM_PAGES_TO_OS_PAGES(numPages);
-
-        for (i = 0; i < numOsPages; i++)
-        {
-            // Update GPA to system physical address
-            addrArray[i] += pKernelMemorySystem->coherentCpuFbBase;
-        }
+        numPages = NV_RM_PAGES_TO_OS_PAGES(numPages);
    }

-    rmStatus = nv_register_phys_pages(nv, addrArray, numOsPages, NV_MEMORY_CACHED, &pAllocPrivate);
+    for (i = 0; i < numPages; i++)
+    {
+        // Update GPA to system physical address
+        addrArray[i] += pKernelMemorySystem->coherentCpuFbBase;
+    }
+
+    rmStatus = nv_register_phys_pages(nv, addrArray, numPages, NV_MEMORY_CACHED, &pAllocPrivate);
    if (rmStatus != NV_OK)
    {
        goto errors;
@ -442,10 +439,7 @@ static NV_STATUS setNumaPrivData
    memdescSetMemData(pMemDesc, pAllocPrivate, NULL);

 errors:
-    if (NV_RM_PAGE_SIZE < os_page_size)
-    {
-        portMemFree(addrArray);
-    }
+    portMemFree(addrArray);

    return rmStatus;
 }
--- a/src/nvidia/generated/g_allclasses.h
+++ b/src/nvidia/generated/g_allclasses.h
@ -116,6 +116,7 @@
 #include <class/clb0cc.h>               // MAXWELL_PROFILER
 #include <class/clb2cc.h>               // MAXWELL_PROFILER_DEVICE
 #include <class/cl90e6.h>               // GF100_SUBDEVICE_MASTER
+#include <class/cl90e7.h>               // GF100_SUBDEVICE_INFOROM
 #include <class/cl9096.h>               // GF100_ZBC_CLEAR
 #include <class/cl9072.h>               // GF100_DISP_SW
 #include <class/cl9074.h>               // GF100_TIMED_SEMAPHORE_SW
@ -585,6 +586,10 @@
 #define GF100_SUBDEVICE_MASTER                   (0x000090e6)
 #endif

+#ifndef GF100_SUBDEVICE_INFOROM
+#define GF100_SUBDEVICE_INFOROM                  (0x000090e7)
+#endif
+
 #ifndef GF100_ZBC_CLEAR
 #define GF100_ZBC_CLEAR                          (0x00009096)
 #endif
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveConcatenatedFMC_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_AD102.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_AD102.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GA102.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GA102.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmBoot_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmCcFmcGfwProdSigned_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwDebugSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwDebugSigned_GH100.c
--- a/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwProdSigned_GH100.c
+++ b/src/nvidia/generated/g_bindata_kgspGetBinArchiveGspRmFmcGfwProdSigned_GH100.c
--- a/src/nvidia/generated/g_client_resource_nvoc.c
+++ b/src/nvidia/generated/g_client_resource_nvoc.c
@ -351,12 +351,12 @@ static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_RmClient
 #endif
    },
    {               /*  [10] */
-#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
        /*pFunc=*/      (void (*)(void)) NULL,
 #else
        /*pFunc=*/      (void (*)(void)) cliresCtrlCmdSystemGetP2pCaps_IMPL,
-#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
-        /*flags=*/      0x811u,
+#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
+        /*flags=*/      0x810u,
        /*accessRight=*/0x0u,
        /*methodId=*/   0x127u,
        /*paramSize=*/  sizeof(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_PARAMS),
@ -381,12 +381,12 @@ static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_RmClient
 #endif
    },
    {               /*  [12] */
-#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
        /*pFunc=*/      (void (*)(void)) NULL,
 #else
        /*pFunc=*/      (void (*)(void)) cliresCtrlCmdSystemGetP2pCapsV2_IMPL,
-#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
-        /*flags=*/      0x811u,
+#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
+        /*flags=*/      0x810u,
        /*accessRight=*/0x0u,
        /*methodId=*/   0x12bu,
        /*paramSize=*/  sizeof(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_V2_PARAMS),
@ -576,12 +576,12 @@ static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_RmClient
 #endif
    },
    {               /*  [25] */
-#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
        /*pFunc=*/      (void (*)(void)) NULL,
 #else
        /*pFunc=*/      (void (*)(void)) cliresCtrlCmdSystemGetP2pCapsMatrix_IMPL,
-#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
-        /*flags=*/      0x811u,
+#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
+        /*flags=*/      0x810u,
        /*accessRight=*/0x0u,
        /*methodId=*/   0x13au,
        /*paramSize=*/  sizeof(NV0000_CTRL_SYSTEM_GET_P2P_CAPS_MATRIX_PARAMS),
@ -1801,15 +1801,15 @@ static void __nvoc_init_funcTable_RmClientResource_1(RmClientResource *pThis) {
    pThis->__cliresCtrlCmdSystemGetHwbcInfo__ = &cliresCtrlCmdSystemGetHwbcInfo_IMPL;
 #endif

-#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
    pThis->__cliresCtrlCmdSystemGetP2pCaps__ = &cliresCtrlCmdSystemGetP2pCaps_IMPL;
 #endif

-#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
    pThis->__cliresCtrlCmdSystemGetP2pCapsV2__ = &cliresCtrlCmdSystemGetP2pCapsV2_IMPL;
 #endif

-#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x811u)
+#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x810u)
    pThis->__cliresCtrlCmdSystemGetP2pCapsMatrix__ = &cliresCtrlCmdSystemGetP2pCapsMatrix_IMPL;
 #endif

--- a/src/nvidia/generated/g_crashcat_report_nvoc.c
+++ b/src/nvidia/generated/g_crashcat_report_nvoc.c
@ -125,24 +125,32 @@ static void __nvoc_init_funcTable_CrashCatReport_1(CrashCatReport *pThis) {
    PORT_UNREFERENCED_VARIABLE(reportHal_HalVarIdx);

    // Hal function -- crashcatReportLogReporter
-    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* CrashCatReportHal: V1_GENERIC */ 
-    {
-        pThis->__crashcatReportLogReporter__ = &crashcatReportLogReporter_V1_GENERIC;
-    }
-    else
+    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* CrashCatReportHal: V1_LIBOS2 */ 
    {
        pThis->__crashcatReportLogReporter__ = &crashcatReportLogReporter_V1_LIBOS2;
    }
-
-    // Hal function -- crashcatReportLogSource
-    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* CrashCatReportHal: V1_GENERIC */ 
+    else if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000004UL) )) /* CrashCatReportHal: V1_LIBOS3 */ 
    {
-        pThis->__crashcatReportLogSource__ = &crashcatReportLogSource_V1_GENERIC;
+        pThis->__crashcatReportLogReporter__ = &crashcatReportLogReporter_V1_LIBOS3;
    }
    else
+    {
+        pThis->__crashcatReportLogReporter__ = &crashcatReportLogReporter_V1_GENERIC;
+    }
+
+    // Hal function -- crashcatReportLogSource
+    if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* CrashCatReportHal: V1_LIBOS2 */ 
    {
        pThis->__crashcatReportLogSource__ = &crashcatReportLogSource_V1_LIBOS2;
    }
+    else if (( ((reportHal_HalVarIdx >> 5) == 0UL) && ((1UL << (reportHal_HalVarIdx & 0x1f)) & 0x00000004UL) )) /* CrashCatReportHal: V1_LIBOS3 */ 
+    {
+        pThis->__crashcatReportLogSource__ = &crashcatReportLogSource_V1_LIBOS3;
+    }
+    else
+    {
+        pThis->__crashcatReportLogSource__ = &crashcatReportLogSource_V1_GENERIC;
+    }
 }

 void __nvoc_init_funcTable_CrashCatReport(CrashCatReport *pThis) {
--- a/src/nvidia/generated/g_crashcat_report_nvoc.h
+++ b/src/nvidia/generated/g_crashcat_report_nvoc.h
@ -71,7 +71,8 @@ struct __nvoc_inner_struc_CrashCatReport_1__ {
    NvCrashCatReport_V1 report;
    NvCrashCatRiscv64CsrState_V1 riscv64CsrState;
    NvCrashCatRiscv64GprState_V1 riscv64GprState;
-    NvCrashCatRiscv64Trace_V1 *pRiscv64Trace;
+    NvCrashCatRiscv64Trace_V1 *pRiscv64StackTrace;
+    NvCrashCatRiscv64Trace_V1 *pRiscv64PcTrace;
    NvCrashCatIo32State_V1 *pIo32State;
 };

@ -236,18 +237,18 @@ static inline void crashcatReportLogRiscv64GprState(struct CrashCatReport *arg0)

 #define crashcatReportLogRiscv64GprState_HAL(arg0) crashcatReportLogRiscv64GprState(arg0)

-void crashcatReportLogRiscv64Trace_V1(struct CrashCatReport *arg0);
+void crashcatReportLogRiscv64Traces_V1(struct CrashCatReport *arg0);


 #ifdef __nvoc_crashcat_report_h_disabled
-static inline void crashcatReportLogRiscv64Trace(struct CrashCatReport *arg0) {
+static inline void crashcatReportLogRiscv64Traces(struct CrashCatReport *arg0) {
    NV_ASSERT_FAILED_PRECOMP("CrashCatReport was disabled!");
 }
 #else //__nvoc_crashcat_report_h_disabled
-#define crashcatReportLogRiscv64Trace(arg0) crashcatReportLogRiscv64Trace_V1(arg0)
+#define crashcatReportLogRiscv64Traces(arg0) crashcatReportLogRiscv64Traces_V1(arg0)
 #endif //__nvoc_crashcat_report_h_disabled

-#define crashcatReportLogRiscv64Trace_HAL(arg0) crashcatReportLogRiscv64Trace(arg0)
+#define crashcatReportLogRiscv64Traces_HAL(arg0) crashcatReportLogRiscv64Traces(arg0)

 void crashcatReportLogIo32State_V1(struct CrashCatReport *arg0);

@ -266,6 +267,8 @@ void crashcatReportLogReporter_V1_GENERIC(struct CrashCatReport *arg0);

 void crashcatReportLogReporter_V1_LIBOS2(struct CrashCatReport *arg0);

+void crashcatReportLogReporter_V1_LIBOS3(struct CrashCatReport *arg0);
+
 static inline void crashcatReportLogReporter_DISPATCH(struct CrashCatReport *arg0) {
    arg0->__crashcatReportLogReporter__(arg0);
 }
@ -274,6 +277,8 @@ void crashcatReportLogSource_V1_GENERIC(struct CrashCatReport *arg0);

 void crashcatReportLogSource_V1_LIBOS2(struct CrashCatReport *arg0);

+void crashcatReportLogSource_V1_LIBOS3(struct CrashCatReport *arg0);
+
 static inline void crashcatReportLogSource_DISPATCH(struct CrashCatReport *arg0) {
    arg0->__crashcatReportLogSource__(arg0);
 }
@ -347,8 +352,8 @@ void NVOC_PRIVATE_FUNCTION(crashcatReportLogRiscv64GprState)(struct CrashCatRepo
 #endif //__nvoc_crashcat_report_h_disabled

 #ifndef __nvoc_crashcat_report_h_disabled
-#undef crashcatReportLogRiscv64Trace
-void NVOC_PRIVATE_FUNCTION(crashcatReportLogRiscv64Trace)(struct CrashCatReport *arg0);
+#undef crashcatReportLogRiscv64Traces
+void NVOC_PRIVATE_FUNCTION(crashcatReportLogRiscv64Traces)(struct CrashCatReport *arg0);
 #endif //__nvoc_crashcat_report_h_disabled

 #ifndef __nvoc_crashcat_report_h_disabled
--- a/src/nvidia/generated/g_generic_engine_nvoc.c
+++ b/src/nvidia/generated/g_generic_engine_nvoc.c
@ -227,6 +227,21 @@ static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_GenericE
        /*pClassInfo=*/ &(__nvoc_class_def_GenericEngineApi.classInfo),
 #if NV_PRINTF_STRINGS_ALLOWED
        /*func=*/       "genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask"
+#endif
+    },
+    {               /*  [2] */
+#if NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x250u)
+        /*pFunc=*/      (void (*)(void)) NULL,
+#else
+        /*pFunc=*/      (void (*)(void)) genapiCtrlCmdBBXGetLastFlushTime_IMPL,
+#endif // NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x250u)
+        /*flags=*/      0x250u,
+        /*accessRight=*/0x0u,
+        /*methodId=*/   0x90e70113u,
+        /*paramSize=*/  sizeof(NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS),
+        /*pClassInfo=*/ &(__nvoc_class_def_GenericEngineApi.classInfo),
+#if NV_PRINTF_STRINGS_ALLOWED
+        /*func=*/       "genapiCtrlCmdBBXGetLastFlushTime"
 #endif
    },

@ -234,7 +249,7 @@ static const struct NVOC_EXPORTED_METHOD_DEF __nvoc_exported_method_def_GenericE

 const struct NVOC_EXPORT_INFO __nvoc_export_info_GenericEngineApi = 
 {
-    /*numEntries=*/     2,
+    /*numEntries=*/     3,
    /*pExportEntries=*/ __nvoc_exported_method_def_GenericEngineApi
 };

@ -285,6 +300,10 @@ static void __nvoc_init_funcTable_GenericEngineApi_1(GenericEngineApi *pThis) {
    pThis->__genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask__ = &genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask_IMPL;
 #endif

+#if !NVOC_EXPORTED_METHOD_DISABLED_BY_FLAG(0x250u)
+    pThis->__genapiCtrlCmdBBXGetLastFlushTime__ = &genapiCtrlCmdBBXGetLastFlushTime_IMPL;
+#endif
+
    pThis->__nvoc_base_GpuResource.__gpuresMap__ = &__nvoc_thunk_GenericEngineApi_gpuresMap;

    pThis->__nvoc_base_GpuResource.__gpuresGetMapAddrSpace__ = &__nvoc_thunk_GenericEngineApi_gpuresGetMapAddrSpace;
--- a/src/nvidia/generated/g_generic_engine_nvoc.h
+++ b/src/nvidia/generated/g_generic_engine_nvoc.h
@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -35,6 +35,7 @@ extern "C" {

 #include "gpu/gpu_resource.h"
 #include "ctrl/ctrl90e6.h"
+#include "ctrl/ctrl90e7.h"
 #include "rmapi/resource.h" // for macro RMCTRL_EXPORT etc.

 /*!
@ -61,6 +62,7 @@ struct GenericEngineApi {
    NV_STATUS (*__genapiControl__)(struct GenericEngineApi *, struct CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
    NV_STATUS (*__genapiCtrlCmdMasterGetErrorIntrOffsetMask__)(struct GenericEngineApi *, NV90E6_CTRL_MASTER_GET_ERROR_INTR_OFFSET_MASK_PARAMS *);
    NV_STATUS (*__genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask__)(struct GenericEngineApi *, NV90E6_CTRL_MASTER_GET_VIRTUAL_FUNCTION_ERROR_CONT_INTR_MASK_PARAMS *);
+    NV_STATUS (*__genapiCtrlCmdBBXGetLastFlushTime__)(struct GenericEngineApi *, NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS *);
    NvBool (*__genapiShareCallback__)(struct GenericEngineApi *, struct RsClient *, struct RsResourceRef *, RS_SHARE_POLICY *);
    NV_STATUS (*__genapiCheckMemInterUnmap__)(struct GenericEngineApi *, NvBool);
    NV_STATUS (*__genapiMapTo__)(struct GenericEngineApi *, RS_RES_MAP_TO_PARAMS *);
@ -118,6 +120,7 @@ NV_STATUS __nvoc_objCreate_GenericEngineApi(GenericEngineApi**, Dynamic*, NvU32,
 #define genapiControl(pGenericEngineApi, pCallContext, pParams) genapiControl_DISPATCH(pGenericEngineApi, pCallContext, pParams)
 #define genapiCtrlCmdMasterGetErrorIntrOffsetMask(pGenericEngineApi, pParams) genapiCtrlCmdMasterGetErrorIntrOffsetMask_DISPATCH(pGenericEngineApi, pParams)
 #define genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask(pGenericEngineApi, pParams) genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask_DISPATCH(pGenericEngineApi, pParams)
+#define genapiCtrlCmdBBXGetLastFlushTime(pGenericEngineApi, pParams) genapiCtrlCmdBBXGetLastFlushTime_DISPATCH(pGenericEngineApi, pParams)
 #define genapiShareCallback(pGpuResource, pInvokingClient, pParentRef, pSharePolicy) genapiShareCallback_DISPATCH(pGpuResource, pInvokingClient, pParentRef, pSharePolicy)
 #define genapiCheckMemInterUnmap(pRmResource, bSubdeviceHandleProvided) genapiCheckMemInterUnmap_DISPATCH(pRmResource, bSubdeviceHandleProvided)
 #define genapiMapTo(pResource, pParams) genapiMapTo_DISPATCH(pResource, pParams)
@ -170,6 +173,12 @@ static inline NV_STATUS genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask_D
    return pGenericEngineApi->__genapiCtrlCmdMasterGetVirtualFunctionErrorContIntrMask__(pGenericEngineApi, pParams);
 }

+NV_STATUS genapiCtrlCmdBBXGetLastFlushTime_IMPL(struct GenericEngineApi *pGenericEngineApi, NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS *pParams);
+
+static inline NV_STATUS genapiCtrlCmdBBXGetLastFlushTime_DISPATCH(struct GenericEngineApi *pGenericEngineApi, NV90E7_CTRL_BBX_GET_LAST_FLUSH_TIME_PARAMS *pParams) {
+    return pGenericEngineApi->__genapiCtrlCmdBBXGetLastFlushTime__(pGenericEngineApi, pParams);
+}
+
 static inline NvBool genapiShareCallback_DISPATCH(struct GenericEngineApi *pGpuResource, struct RsClient *pInvokingClient, struct RsResourceRef *pParentRef, RS_SHARE_POLICY *pSharePolicy) {
    return pGpuResource->__genapiShareCallback__(pGpuResource, pInvokingClient, pParentRef, pSharePolicy);
 }
--- a/src/nvidia/generated/g_gpu_class_list.c
+++ b/src/nvidia/generated/g_gpu_class_list.c
@ -39,6 +39,7 @@ gpuGetClassDescriptorList_TU102(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -103,6 +104,7 @@ gpuGetClassDescriptorList_TU104(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -168,6 +170,7 @@ gpuGetClassDescriptorList_TU106(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -234,6 +237,7 @@ gpuGetClassDescriptorList_TU116(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -298,6 +302,7 @@ gpuGetClassDescriptorList_TU117(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -381,6 +386,7 @@ gpuGetClassDescriptorList_GA100(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { FERMI_TWOD_A, ENG_GR(0) },
        { FERMI_VASPACE_A, ENG_DMA },
        { G84_PERFBUFFER, ENG_BUS },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -444,6 +450,7 @@ gpuGetClassDescriptorList_GA102(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -514,6 +521,7 @@ gpuGetClassDescriptorList_GA103(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -584,6 +592,7 @@ gpuGetClassDescriptorList_GA104(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -654,6 +663,7 @@ gpuGetClassDescriptorList_GA106(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -724,6 +734,7 @@ gpuGetClassDescriptorList_GA107(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -794,6 +805,7 @@ gpuGetClassDescriptorList_AD102(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -872,6 +884,7 @@ gpuGetClassDescriptorList_AD103(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -950,6 +963,7 @@ gpuGetClassDescriptorList_AD104(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -1028,6 +1042,7 @@ gpuGetClassDescriptorList_AD106(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -1106,6 +1121,7 @@ gpuGetClassDescriptorList_AD107(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { G84_PERFBUFFER, ENG_BUS },
        { GF100_DISP_SW, ENG_SW },
        { GF100_HDACODEC, ENG_HDACODEC },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
@ -1175,6 +1191,7 @@ gpuGetClassDescriptorList_GH100(POBJGPU pGpu, NvU32 *pNumClassDescriptors)
        { FERMI_TWOD_A, ENG_GR(0) },
        { FERMI_VASPACE_A, ENG_DMA },
        { G84_PERFBUFFER, ENG_BUS },
+        { GF100_SUBDEVICE_INFOROM, ENG_GPU },
        { GF100_SUBDEVICE_MASTER, ENG_GPU },
        { GF100_TIMED_SEMAPHORE_SW, ENG_SW },
        { GF100_ZBC_CLEAR, ENG_KERNEL_MEMORY_SYSTEM },
--- a/src/nvidia/generated/g_gpu_nvoc.c
+++ b/src/nvidia/generated/g_gpu_nvoc.c
@ -632,6 +632,17 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
        pThis->__gpuClearFbhubPoisonIntrForBug2924523__ = &gpuClearFbhubPoisonIntrForBug2924523_56cd7a;
    }

+    // Hal function -- gpuCheckIfFbhubPoisonIntrPending
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000400UL) )) /* ChipHal: GA100 | GH100 */ 
+    {
+        pThis->__gpuCheckIfFbhubPoisonIntrPending__ = &gpuCheckIfFbhubPoisonIntrPending_GA100;
+    }
+    // default
+    else
+    {
+        pThis->__gpuCheckIfFbhubPoisonIntrPending__ = &gpuCheckIfFbhubPoisonIntrPending_491d52;
+    }
+
    // Hal function -- gpuReadDeviceId
    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
    {
@ -732,6 +743,28 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
    {
        pThis->__gpuIsCtxBufAllocInPmaSupported__ = &gpuIsCtxBufAllocInPmaSupported_491d52;
    }
+
+    // Hal function -- gpuCheckEccCounts
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */ 
+    {
+        pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_TU102;
+    }
+    // default
+    else
+    {
+        pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_d44104;
+    }
+
+    // Hal function -- gpuClearEccCounts
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */ 
+    {
+        pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_TU102;
+    }
+    // default
+    else
+    {
+        pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_ac1694;
+    }
 }

 void __nvoc_init_funcTable_OBJGPU(OBJGPU *pThis) {
--- a/src/nvidia/generated/g_gpu_nvoc.h
+++ b/src/nvidia/generated/g_gpu_nvoc.h
@ -886,6 +886,7 @@ struct OBJGPU {
    NvU32 (*__gpuGetPhysAddrWidth__)(struct OBJGPU *, NV_ADDRESS_SPACE);
    NvBool (*__gpuFuseSupportsDisplay__)(struct OBJGPU *);
    NV_STATUS (*__gpuClearFbhubPoisonIntrForBug2924523__)(struct OBJGPU *);
+    NvBool (*__gpuCheckIfFbhubPoisonIntrPending__)(struct OBJGPU *);
    void (*__gpuReadDeviceId__)(struct OBJGPU *, NvU32 *, NvU32 *);
    NvU64 (*__gpuGetFlaVasSize__)(struct OBJGPU *, NvBool);
    void (*__gpuDetermineSelfHostedMode__)(struct OBJGPU *);
@ -895,6 +896,8 @@ struct OBJGPU {
    NvBool (*__gpuIsCCEnabledInHw__)(struct OBJGPU *);
    NvBool (*__gpuIsDevModeEnabledInHw__)(struct OBJGPU *);
    NvBool (*__gpuIsCtxBufAllocInPmaSupported__)(struct OBJGPU *);
+    void (*__gpuCheckEccCounts__)(struct OBJGPU *);
+    NV_STATUS (*__gpuClearEccCounts__)(struct OBJGPU *);
    NvBool PDB_PROP_GPU_HIGH_SPEED_BRIDGE_CONNECTED;
    NvBool bVideoLinkDisabled;
    GPU_FABRIC_PROBE_INFO_KERNEL *pGpuFabricProbeInfoKernel;
@ -1449,6 +1452,8 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
 #define gpuFuseSupportsDisplay_HAL(pGpu) gpuFuseSupportsDisplay_DISPATCH(pGpu)
 #define gpuClearFbhubPoisonIntrForBug2924523(pGpu) gpuClearFbhubPoisonIntrForBug2924523_DISPATCH(pGpu)
 #define gpuClearFbhubPoisonIntrForBug2924523_HAL(pGpu) gpuClearFbhubPoisonIntrForBug2924523_DISPATCH(pGpu)
+#define gpuCheckIfFbhubPoisonIntrPending(pGpu) gpuCheckIfFbhubPoisonIntrPending_DISPATCH(pGpu)
+#define gpuCheckIfFbhubPoisonIntrPending_HAL(pGpu) gpuCheckIfFbhubPoisonIntrPending_DISPATCH(pGpu)
 #define gpuReadDeviceId(pGpu, arg0, arg1) gpuReadDeviceId_DISPATCH(pGpu, arg0, arg1)
 #define gpuReadDeviceId_HAL(pGpu, arg0, arg1) gpuReadDeviceId_DISPATCH(pGpu, arg0, arg1)
 #define gpuGetFlaVasSize(pGpu, bNvswitchVirtualization) gpuGetFlaVasSize_DISPATCH(pGpu, bNvswitchVirtualization)
@ -1467,6 +1472,10 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
 #define gpuIsDevModeEnabledInHw_HAL(pGpu) gpuIsDevModeEnabledInHw_DISPATCH(pGpu)
 #define gpuIsCtxBufAllocInPmaSupported(pGpu) gpuIsCtxBufAllocInPmaSupported_DISPATCH(pGpu)
 #define gpuIsCtxBufAllocInPmaSupported_HAL(pGpu) gpuIsCtxBufAllocInPmaSupported_DISPATCH(pGpu)
+#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
+#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
+#define gpuClearEccCounts(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
+#define gpuClearEccCounts_HAL(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
 static inline NV_STATUS gpuConstructPhysical_56cd7a(struct OBJGPU *pGpu) {
    return NV_OK;
 }
@ -3095,6 +3104,16 @@ static inline NV_STATUS gpuClearFbhubPoisonIntrForBug2924523_DISPATCH(struct OBJ
    return pGpu->__gpuClearFbhubPoisonIntrForBug2924523__(pGpu);
 }

+NvBool gpuCheckIfFbhubPoisonIntrPending_GA100(struct OBJGPU *pGpu);
+
+static inline NvBool gpuCheckIfFbhubPoisonIntrPending_491d52(struct OBJGPU *pGpu) {
+    return ((NvBool)(0 != 0));
+}
+
+static inline NvBool gpuCheckIfFbhubPoisonIntrPending_DISPATCH(struct OBJGPU *pGpu) {
+    return pGpu->__gpuCheckIfFbhubPoisonIntrPending__(pGpu);
+}
+
 void gpuReadDeviceId_GM107(struct OBJGPU *pGpu, NvU32 *arg0, NvU32 *arg1);

 void gpuReadDeviceId_GH100(struct OBJGPU *pGpu, NvU32 *arg0, NvU32 *arg1);
@ -3189,6 +3208,26 @@ static inline NvBool gpuIsCtxBufAllocInPmaSupported_DISPATCH(struct OBJGPU *pGpu
    return pGpu->__gpuIsCtxBufAllocInPmaSupported__(pGpu);
 }

+static inline void gpuCheckEccCounts_d44104(struct OBJGPU *pGpu) {
+    return;
+}
+
+void gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
+
+static inline void gpuCheckEccCounts_DISPATCH(struct OBJGPU *pGpu) {
+    pGpu->__gpuCheckEccCounts__(pGpu);
+}
+
+static inline NV_STATUS gpuClearEccCounts_ac1694(struct OBJGPU *pGpu) {
+    return NV_OK;
+}
+
+NV_STATUS gpuClearEccCounts_TU102(struct OBJGPU *pGpu);
+
+static inline NV_STATUS gpuClearEccCounts_DISPATCH(struct OBJGPU *pGpu) {
+    return pGpu->__gpuClearEccCounts__(pGpu);
+}
+
 static inline PENGDESCRIPTOR gpuGetInitEngineDescriptors(struct OBJGPU *pGpu) {
    return pGpu->engineOrder.pEngineInitDescriptors;
 }
--- a/src/nvidia/generated/g_kern_bus_nvoc.c
+++ b/src/nvidia/generated/g_kern_bus_nvoc.c
@ -892,6 +892,28 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
        pThis->__kbusTeardownCoherentCpuMapping__ = &kbusTeardownCoherentCpuMapping_d44104;
    }

+    // Hal function -- kbusGetEccCounts
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
+    {
+        pThis->__kbusGetEccCounts__ = &kbusGetEccCounts_GH100;
+    }
+    // default
+    else
+    {
+        pThis->__kbusGetEccCounts__ = &kbusGetEccCounts_4a4dee;
+    }
+
+    // Hal function -- kbusClearEccCounts
+    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */ 
+    {
+        pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_GH100;
+    }
+    // default
+    else
+    {
+        pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_b3696a;
+    }
+
    pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBus_engstateConstructEngine;

    pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelBus_engstateStatePreInitLocked;
--- a/Show More
+++ b/Show More