520.56.06

2022-10-12 10:30:46 -07:00 · 2022-10-12 10:30:46 -07:00 · 7c345b838b
commit 7c345b838b
parent 90eb10774f
40 changed files with 317 additions and 539 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,12 @@

 ## Release 520 Entries

+### [520.56.06] 2022-10-12
+
+#### Added
+
+- Introduce support for GeForce RTX 4090 GPUs.
+
 ### [520.61.05] 2022-10-10

 #### Added
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 520.61.05.
+version 520.56.06.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with gsp.bin
 firmware and user-space NVIDIA GPU driver components from a corresponding
-520.61.05 driver release.  This can be achieved by installing
+520.56.06 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -167,7 +167,7 @@ for the target kernel.
 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 520.61.05 release,
+(see the table below). However, in the 520.56.06 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/520.61.05/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -685,6 +685,7 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 3090 Ti                      | 2203           |
 | NVIDIA GeForce RTX 3090                         | 2204           |
 | NVIDIA GeForce RTX 3080                         | 2206           |
+| NVIDIA GeForce RTX 3070 Ti                      | 2207           |
 | NVIDIA GeForce RTX 3080 Ti                      | 2208           |
 | NVIDIA GeForce RTX 3080                         | 220A           |
 | NVIDIA CMP 90HX                                 | 220D           |
@ -709,6 +710,7 @@ Subsystem Device ID.
 | NVIDIA A10                                      | 2236 10DE 1482 |
 | NVIDIA A10G                                     | 2237 10DE 152F |
 | NVIDIA A10M                                     | 2238 10DE 1677 |
+| NVIDIA H100 PCIe                                | 2331 10DE 1626 |
 | NVIDIA GeForce RTX 3060 Ti                      | 2414           |
 | NVIDIA GeForce RTX 3080 Ti Laptop GPU           | 2420           |
 | NVIDIA RTX A5500 Laptop GPU                     | 2438           |
@ -736,6 +738,7 @@ Subsystem Device ID.
 | NVIDIA RTX A3000 12GB Laptop GPU                | 24B9           |
 | NVIDIA RTX A4500 Laptop GPU                     | 24BA           |
 | NVIDIA RTX A3000 12GB Laptop GPU                | 24BB           |
+| NVIDIA GeForce RTX 3060 Ti                      | 24C9           |
 | NVIDIA GeForce RTX 3080 Laptop GPU              | 24DC           |
 | NVIDIA GeForce RTX 3070 Laptop GPU              | 24DD           |
 | NVIDIA GeForce RTX 3070 Ti Laptop GPU           | 24E0           |
@ -751,6 +754,7 @@ Subsystem Device ID.
 | NVIDIA RTX A2000                                | 2531 103C 151D |
 | NVIDIA RTX A2000                                | 2531 10DE 151D |
 | NVIDIA RTX A2000                                | 2531 17AA 151D |
+| NVIDIA GeForce RTX 3060                         | 2544           |
 | NVIDIA GeForce RTX 3060 Laptop GPU              | 2560           |
 | NVIDIA GeForce RTX 3050 Ti Laptop GPU           | 2563           |
 | NVIDIA RTX A2000 12GB                           | 2571 1028 1611 |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.61.05\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"

 EXTRA_CFLAGS += -Wno-unused-function

--- a/kernel-open/common/inc/nv-pgprot.h
+++ b/kernel-open/common/inc/nv-pgprot.h
@ -78,8 +78,13 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)

 #define NV_PGPROT_UNCACHED_DEVICE(old_prot)     pgprot_noncached(old_prot)
 #if defined(NVCPU_AARCH64)
+#if defined(NV_MT_DEVICE_GRE_PRESENT)
+#define NV_PROT_WRITE_COMBINED_DEVICE   (PROT_DEFAULT | PTE_PXN | PTE_UXN |   \
+                                         PTE_ATTRINDX(MT_DEVICE_GRE))
+#else
 #define NV_PROT_WRITE_COMBINED_DEVICE   (PROT_DEFAULT | PTE_PXN | PTE_UXN |   \
                                         PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#endif
 #define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot)                             \
    __pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
 #define NV_PGPROT_WRITE_COMBINED(old_prot)      NV_PGPROT_UNCACHED(old_prot)
--- a/kernel-open/common/inc/nv.h
+++ b/kernel-open/common/inc/nv.h
@ -636,33 +636,27 @@ typedef enum
 #define NV_GET_NV_STATE(pGpu) \
    (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)

-static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((offset >= nv->regs->cpu_address) &&
-            ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
-}
+#define IS_REG_OFFSET(nv, offset, length)                                       \
+    (((offset) >= (nv)->regs->cpu_address) &&                                   \
+    (((offset) + ((length)-1)) <=                                               \
+        (nv)->regs->cpu_address + ((nv)->regs->size-1)))

-static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return  ((nv->fb) && (offset >= nv->fb->cpu_address) &&
-             ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
-}
+#define IS_FB_OFFSET(nv, offset, length)                                        \
+    (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) &&                       \
+    (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))

-static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
-            (offset >= nv->ud.cpu_address) &&
-            ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
-}
+#define IS_UD_OFFSET(nv, offset, length)                                        \
+    (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) &&                     \
+    ((offset) >= (nv)->ud.cpu_address) &&                                       \
+    (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))

-static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
-            (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
-            (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
-            ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
-                                         (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
-}
+#define IS_IMEM_OFFSET(nv, offset, length)                                      \
+    (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&                    \
+     ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&                           \
+     ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&             \
+     (((offset) + ((length) - 1)) <=                                            \
+        (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +                         \
+            ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))

 #define NV_RM_MAX_MSIX_LINES  8

--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@ -2334,6 +2334,23 @@ compile_test() {
            compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
        ;;

+        mt_device_gre)
+            #
+            # Determine if MT_DEVICE_GRE flag is present.
+            #
+            # MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
+            # ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
+            # (2021-06-01).
+            #
+            CODE="
+            #include <asm/memory.h>
+            unsigned int conftest_mt_device_gre(void) {
+                return MT_DEVICE_GRE;
+            }"
+
+            compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
+        ;;
+
        get_user_pages)
            #
            # Conftest for get_user_pages()
--- a/kernel-open/nvidia/nv-mmap.c
+++ b/kernel-open/nvidia/nv-mmap.c
@ -431,7 +431,7 @@ static int nvidia_mmap_numa(
    const nv_alloc_mapping_context_t *mmap_context)
 {
    NvU64 start, addr;
-    NvU64 pages;
+    unsigned int pages;
    NvU64 i;

    pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT;
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@ -1429,9 +1429,6 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
        return -ENODEV;
    }

-    if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
-        return -EMFILE;
-
    if ( ! (nv->flags & NV_FLAG_OPEN))
    {
        /* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */
--- a/kernel-open/nvidia/nvidia.Kbuild
+++ b/kernel-open/nvidia/nvidia.Kbuild
@ -243,6 +243,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vmalloc_has_pgprot_t_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
 NV_CONFTEST_TYPE_COMPILE_TESTS += pci_channel_state
 NV_CONFTEST_TYPE_COMPILE_TESTS += pci_dev_has_ats_enabled
+NV_CONFTEST_TYPE_COMPILE_TESTS += mt_device_gre
 NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg
 NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
--- a/kernel-open/nvidia/nvlink_linux.c
+++ b/kernel-open/nvidia/nvlink_linux.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2015-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -207,6 +207,8 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp)

    nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n");

+    WARN_ON(private == NULL);
+
    mutex_lock(&nvlink_drvctx.lock);

    if (private->capability_fds.fabric_mgmt > 0)
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r521_82
+    #define NV_BUILD_BRANCH             r521_90
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r521_82
+    #define NV_PUBLIC_BRANCH             r521_90
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r520/r521_82-338"
-#define NV_BUILD_CHANGELIST_NUM         (31864828)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r520/r521_90-315"
+#define NV_BUILD_CHANGELIST_NUM         (31900380)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r520/r521_82-338"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31864828)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r520/r521_90-315"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31900380)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r521_82-4"
-#define NV_BUILD_CHANGELIST_NUM         (31858738)
+#define NV_BUILD_BRANCH_VERSION         "r521_90-15"
+#define NV_BUILD_CHANGELIST_NUM         (31900380)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "522.06"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31858738)
+#define NV_BUILD_NAME                   "522.25"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31900380)
 #define NV_BUILD_BRANCH_BASE_VERSION    R520
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvPNPVendorIds.h
+++ b/src/common/inc/nvPNPVendorIds.h
@ -136,6 +136,7 @@ static const PNPVendorId PNPVendorIds[] =
    { "CLO",    _VENDOR_NAME_ENTRY("Clone Computers/Analogy")           },
    { "CLT",    _VENDOR_NAME_ENTRY("automated computer control systems")},
    { "CMD",    _VENDOR_NAME_ENTRY("CMD Technology")                    },
+    { "CMN",    _VENDOR_NAME_ENTRY("Chimei innolux corp.")              },
    { "CMO",    _VENDOR_NAME_ENTRY("Chi Mei Optoelectronics corp.")     },
    { "CNI",    _VENDOR_NAME_ENTRY("Connect International")             },
    { "CNT",    _VENDOR_NAME_ENTRY("CNet Technology")                   },
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "520.61.05"
+#define NV_VERSION_STRING               "520.56.06"

 #else

--- a/src/common/nvlink/interface/nvlink_lib_ctrl.h
+++ b/src/common/nvlink/interface/nvlink_lib_ctrl.h
@ -89,7 +89,7 @@ typedef struct
 typedef struct
 {
    NvU16               nodeId;
-    NvU16               linkIndex;
+    NvU32               linkIndex;
    nvlink_pci_dev_info pciInfo;
 } nvlink_endpoint;

@ -117,7 +117,7 @@ typedef struct
 typedef struct
 {
    NvU16               nodeId;
-    NvU16               linkIndex;
+    NvU32               linkIndex;
    nvlink_pci_dev_info pciInfo;
    NvU8                devUuid[NVLINK_UUID_LEN];
    NvU32               devType;
@ -189,9 +189,9 @@ typedef enum
 /* link and sublink state of an nvlink endpoint */
 typedef struct
 {
-    NvU8 linkMode;
-    NvU8 txSubLinkMode;
-    NvU8 rxSubLinkMode;
+    NvU32 linkMode;
+    NvU32 txSubLinkMode;
+    NvU32 rxSubLinkMode;
 } nvlink_link_state;

 /*
@ -354,7 +354,7 @@ typedef struct
 */
 typedef struct
 {
-    NvU16  linkIndex;
+    NvU32  linkIndex;
    NvBool initStatus;
 } nvlink_link_init_status;

@ -503,7 +503,7 @@ typedef struct
 */
 typedef struct
 {
-    NvU16 linkIndex;
+    NvU32 linkIndex;
    NV_DECLARE_ALIGNED(NvU64 tokenValue, 8);
 } nvlink_token_info;

@ -1111,11 +1111,6 @@ typedef struct
    NvU32                  endStatesCount;
 } nvlink_get_device_link_states;

-/* 
- * Note: Verify that new parameter structs for IOCTLs satisfy 
- *       sizing restrictions for all OSs they could be used in.
- */ 
-
 #define CTRL_NVLINK_CHECK_VERSION                            0x01
 #define CTRL_NVLINK_SET_NODE_ID                              0x02
 #define CTRL_NVLINK_SET_TX_COMMON_MODE                       0x03
--- a/src/common/nvlink/kernel/nvlink/core/nvlink_link_mgmt.c
+++ b/src/common/nvlink/kernel/nvlink/core/nvlink_link_mgmt.c
@ -78,18 +78,7 @@ nvlink_core_check_link_state
                return NV_FALSE;
            }

-            status = link->link_handlers->get_dl_link_mode(link, &crntDlLinkMode);
-            if (status != NVL_SUCCESS)
-            {
-                NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
-                    "%s: Unable to get DL link mode for %s:%s\n",
-                    __FUNCTION__, link->dev->deviceName, link->linkName));
-                return NV_FALSE;
-            }
-
-            if (crntTlLinkMode == NVLINK_LINKSTATE_HS &&
-                (crntDlLinkMode == NVLINK_LINKSTATE_HS ||
-                 crntDlLinkMode == NVLINK_LINKSTATE_SLEEP))
+            if (crntTlLinkMode == NVLINK_LINKSTATE_HS)
            {
                return NV_TRUE;
            }
--- a/src/common/nvlink/kernel/nvlink/core/nvlink_shutdown.c
+++ b/src/common/nvlink/kernel/nvlink/core/nvlink_shutdown.c
@ -430,7 +430,7 @@ nvlink_core_powerdown_intranode_conns_from_active_to_off
                
            // to track Failure
            conns[i]->end0->inSWCFG = NV_FALSE;        
-	    }
+	}
        else
        {
            conns[i]->end0->inSWCFG = NV_TRUE;
@ -448,14 +448,14 @@ nvlink_core_powerdown_intranode_conns_from_active_to_off

            // to track Failure
            conns[i]->end1->inSWCFG = NV_FALSE; 
-	    }
+	}
        else
        {
            conns[i]->end1->inSWCFG = NV_TRUE;
        }

        // Change each sublink state to SAFE
-	    if(conns[i]->end0->inSWCFG == NV_TRUE)
+	if(conns[i]->end0->inSWCFG == NV_TRUE)
        {
            conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
                                                       NVLINK_SUBLINK_STATE_TX_SAFE,
@ -928,8 +928,7 @@ nvlink_core_powerdown_floorswept_conns_to_off
            {
                nvlink_core_get_intranode_conn(links[j], &(conn));
                if (conn == NULL ||
-                    _nvlink_core_check_if_conn_in_array(visitedConns, connCount, conn) ||
-                    (conn->end0 == NULL || conn->end1 == NULL))
+                    _nvlink_core_check_if_conn_in_array(visitedConns, connCount, conn))
                {
                    continue;
                }
--- a/src/common/nvswitch/kernel/inc/common_nvswitch.h
+++ b/src/common/nvswitch/kernel/inc/common_nvswitch.h
@ -535,7 +535,6 @@ void      nvswitch_reset_persistent_link_hw_state(nvswitch_device *device, NvU32
 void      nvswitch_store_topology_information(nvswitch_device *device, nvlink_link *link);

 NvlStatus nvswitch_launch_ALI(nvswitch_device *device);
-NvlStatus nvswitch_launch_ALI_link_training(nvswitch_device *device, nvlink_link *link, NvBool bSync);
 NvlStatus nvswitch_inband_read_data(nvswitch_device *device, NvU8 *dest, NvU32 linkId, NvU32 *dataSize);
 void      nvswitch_filter_messages(nvswitch_device *device, NvU32 linkId);
 NvlStatus nvswitch_set_training_mode(nvswitch_device *device);
--- a/src/common/nvswitch/kernel/inc/haldef_nvswitch.h
+++ b/src/common/nvswitch/kernel/inc/haldef_nvswitch.h
@ -210,7 +210,7 @@

 #define NVSWITCH_HAL_FUNCTION_LIST_LS10(_op, _arch) \
    _op(NvlStatus, nvswitch_launch_ALI, (nvswitch_device *device), _arch) \
-    _op(NvlStatus, nvswitch_launch_ALI_link_training, (nvswitch_device *device, nvlink_link *link, NvBool bSync), _arch) \
+    _op(NvlStatus, nvswitch_launch_ALI_link_training, (nvswitch_device *device, nvlink_link *link), _arch) \
    _op(NvlStatus, nvswitch_ctrl_inband_send_data, (nvswitch_device *device, NVSWITCH_INBAND_SEND_DATA_PARAMS *p), _arch) \
    _op(NvlStatus, nvswitch_ctrl_inband_read_data, (nvswitch_device *device, NVSWITCH_INBAND_READ_DATA_PARAMS *p), _arch) \
    _op(NvlStatus, nvswitch_ctrl_set_residency_bins, (nvswitch_device *device, NVSWITCH_SET_RESIDENCY_BINS *p), _arch) \
--- a/src/common/nvswitch/kernel/inc/lr10/lr10.h
+++ b/src/common/nvswitch/kernel/inc/lr10/lr10.h
@ -648,7 +648,7 @@ NvlStatus nvswitch_ctrl_get_nvlink_lp_counters_lr10(nvswitch_device *device, NVS
 NvlStatus nvswitch_service_nvldl_fatal_link_lr10(nvswitch_device *device, NvU32 nvliptInstance, NvU32 link);
 NvlStatus nvswitch_ctrl_inband_send_data_lr10(nvswitch_device *device, NVSWITCH_INBAND_SEND_DATA_PARAMS *p);
 NvlStatus nvswitch_ctrl_inband_read_data_lr10(nvswitch_device *device, NVSWITCH_INBAND_READ_DATA_PARAMS *p);
-NvlStatus nvswitch_launch_ALI_link_training_lr10(nvswitch_device *device, nvlink_link *link, NvBool bSync);
+NvlStatus nvswitch_launch_ALI_link_training_lr10(nvswitch_device *device, nvlink_link *link);
 NvlStatus nvswitch_service_minion_link_lr10(nvswitch_device *device, NvU32 nvliptInstance);
 void      nvswitch_apply_recal_settings_lr10(nvswitch_device *device, nvlink_link *link);
 NvlStatus nvswitch_ctrl_get_sw_info_lr10(nvswitch_device *device, NVSWITCH_GET_SW_INFO_PARAMS *p);
--- a/src/common/nvswitch/kernel/lr10/link_lr10.c
+++ b/src/common/nvswitch/kernel/lr10/link_lr10.c
@ -2047,8 +2047,7 @@ NvlStatus
 nvswitch_launch_ALI_link_training_lr10
 (
    nvswitch_device *device,
-    nvlink_link     *link,
-    NvBool           bSync
+    nvlink_link     *link
 )
 {
    return NVL_ERR_NOT_IMPLEMENTED;
--- a/src/common/nvswitch/kernel/nvswitch.c
+++ b/src/common/nvswitch/kernel/nvswitch.c
@ -307,7 +307,7 @@ _nvswitch_corelib_ali_training
 )
 {
    nvswitch_device *device = link->dev->pDevInfo;
-    return device->hal.nvswitch_launch_ALI_link_training(device, link, NV_FALSE);
+    return device->hal.nvswitch_launch_ALI_link_training(device, link);
 }

 void
@ -4191,11 +4191,10 @@ NvlStatus
 nvswitch_launch_ALI_link_training
 (
    nvswitch_device *device,
-    nvlink_link *link,
-    NvBool bSync
+    nvlink_link *link
 )
 {
-    return device->hal.nvswitch_launch_ALI_link_training(device, link, bSync);
+    return device->hal.nvswitch_launch_ALI_link_training(device, link);
 }

 NvlStatus
--- a/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h
+++ b/src/common/sdk/nvidia/inc/ctrl/ctrl208f/ctrl208ffb.h
@ -583,25 +583,4 @@ typedef struct NV208F_CTRL_FB_CLEAR_REMAPPED_ROWS_PARAMS {
    NvU32 sourceMask;
 } NV208F_CTRL_FB_CLEAR_REMAPPED_ROWS_PARAMS;

-/*
- * NV208F_CTRL_CMD_FB_GET_FLOORSWEPT_FBPA_MASK
- *
- * This command calculates the floorswept fbpa mask by taking 1/2 HBM
- * floorsweeping into account
- *
- *   fbpaMask
- *     This value of the mask.
- *
- *   Possbile status values returned are:
- *     NV_OK
- *     NV_ERR_NOT_SUPPORTED
- */
-#define NV208F_CTRL_CMD_FB_GET_FLOORSWEPT_FBPA_MASK (0x208f0516) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_FB_INTERFACE_ID << 8) | NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS_MESSAGE_ID" */
-
-#define NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS_MESSAGE_ID (0x16U)
-
-typedef struct NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS {
-    NvU32 fbpaMask;
-} NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS;
-
 /* _ctrl208ffb_h_ */
--- a/src/nvidia-modeset/src/nvkms-evo3.c
+++ b/src/nvidia-modeset/src/nvkms-evo3.c
@ -5200,11 +5200,13 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
                                   const NVHwModeViewPortEvo *pViewPortMin,
                                   const NVHwModeViewPortEvo *pViewPort,
                                   const NVHwModeViewPortEvo *pViewPortMax,
-                                   NVEvoUpdateState *updateState)
+                                   NVEvoUpdateState *updateState,
+                                   NvU32 setWindowUsageBounds)
 {
    const NVEvoCapabilitiesPtr pEvoCaps = &pDevEvo->gpus[0].capabilities;
    NVEvoChannelPtr pChannel = pDevEvo->core;
    struct NvKmsScalingUsageBounds scalingUsageBounds = { };
+    NvU32 win;

    /* These methods should only apply to a single pDpy */
    nvAssert(pDevEvo->subDevMaskStackDepth > 0);
@ -5250,6 +5252,35 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
        DRF_NUM(C37D, _HEAD_SET_MAX_OUTPUT_SCALE_FACTOR, _VERTICAL,
                scalingUsageBounds.maxVDownscaleFactor));

+    /*
+     * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
+     * for each window that is attached to the head.
+     *
+     * Precomp will clip the post-scaled window to the input viewport, reverse-scale
+     * this cropped size back to the input surface domain, and isohub will fetch
+     * this cropped size. This function assumes that there's no window scaling yet,
+     * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
+     * width. SetScalingUsageBoundsOneWindow5() will take care of updating
+     * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
+     *
+     * Program MAX_PIXELS_FETCHED_PER_LINE for each window that is attached to
+     * head. For Turing+, SetScalingUsageBoundsOneWindow5() will take care of
+     * programming window usage bounds only for the layers/windows in use.
+     */
+    setWindowUsageBounds |=
+        DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
+                GetMaxPixelsFetchedPerLine(pViewPort->in.width,
+                NV_EVO_SCALE_FACTOR_1X));
+
+    for (win = 0; win < pDevEvo->numWindows; win++) {
+        if (head != pDevEvo->headForWindow[win]) {
+            continue;
+        }
+
+        nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1);
+        nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds);
+    }
+
    return scalingUsageBounds.vUpscalingAllowed;
 }

@ -5260,11 +5291,10 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
                                  NVEvoUpdateState *updateState)
 {
    NVEvoChannelPtr pChannel = pDevEvo->core;
-    NvU32 win;
-    NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3;
    NvBool verticalUpscalingAllowed =
        EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
-                             pViewPortMax, updateState);
+                             pViewPortMax, updateState,
+                             NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3);

    nvDmaSetStartEvoMethod(pChannel,
        NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);
@ -5274,34 +5304,6 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
        (verticalUpscalingAllowed ?
            DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) :
            DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)));
-     /*
-      * Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
-      * for each window that is attached to the head.
-      *
-      * Precomp will clip the post-scaled window to the input viewport, reverse-scale
-      * this cropped size back to the input surface domain, and isohub will fetch
-      * this cropped size. This function assumes that there's no window scaling yet,
-      * so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
-      * width. SetScalingUsageBoundsOneWindow5() will take care of updating
-      * MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
-      * On Volta, Program for each window that is attached to head. For turing+,
-      * SetScalingUsageBoundsOneWindow5() will take care of programming window
-      * usage bounds only for the layers/windows in use.
-      */
-
-    setWindowUsageBounds |=
-       DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
-               GetMaxPixelsFetchedPerLine(pViewPort->in.width,
-               NV_EVO_SCALE_FACTOR_1X));
-
-    for (win = 0; win < pDevEvo->numWindows; win++) {
-        if (head != pDevEvo->headForWindow[win]) {
-            continue;
-        }
-
-        nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1);
-        nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds);
-    }
 }

 static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
@ -5311,9 +5313,13 @@ static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
                                  NVEvoUpdateState *updateState)
 {
    NVEvoChannelPtr pChannel = pDevEvo->core;
+    NvU32 setWindowUsageBounds =
+        (NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5 |
+         DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2) |
+         DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE));
    NvU32 verticalUpscalingAllowed =
        EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
-                             pViewPortMax, updateState);
+                             pViewPortMax, updateState, setWindowUsageBounds);

    nvDmaSetStartEvoMethod(pChannel,
        NVC57D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);
--- a/src/nvidia/arch/nvalloc/unix/include/nv.h
+++ b/src/nvidia/arch/nvalloc/unix/include/nv.h
@ -636,33 +636,27 @@ typedef enum
 #define NV_GET_NV_STATE(pGpu) \
    (nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)

-static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((offset >= nv->regs->cpu_address) &&
-            ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
-}
+#define IS_REG_OFFSET(nv, offset, length)                                       \
+    (((offset) >= (nv)->regs->cpu_address) &&                                   \
+    (((offset) + ((length)-1)) <=                                               \
+        (nv)->regs->cpu_address + ((nv)->regs->size-1)))

-static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return  ((nv->fb) && (offset >= nv->fb->cpu_address) &&
-             ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
-}
+#define IS_FB_OFFSET(nv, offset, length)                                        \
+    (((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) &&                       \
+    (((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))

-static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
-            (offset >= nv->ud.cpu_address) &&
-            ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
-}
+#define IS_UD_OFFSET(nv, offset, length)                                        \
+    (((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) &&                     \
+    ((offset) >= (nv)->ud.cpu_address) &&                                       \
+    (((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))

-static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
-{
-    return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
-            (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
-            (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
-            ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
-                                         (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
-}
+#define IS_IMEM_OFFSET(nv, offset, length)                                      \
+    (((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&                    \
+     ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&                           \
+     ((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&             \
+     (((offset) + ((length) - 1)) <=                                            \
+        (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +                         \
+            ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))

 #define NV_RM_MAX_MSIX_LINES  8

--- a/src/nvidia/arch/nvalloc/unix/src/osapi.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osapi.c
@ -780,8 +780,10 @@ static NV_STATUS RmAccessRegistry(
            RmStatus = NV_ERR_INVALID_STRING_LENGTH;
            goto done;
        }
+
        // get access to client's parmStr
        RMAPI_PARAM_COPY_INIT(parmStrParamCopy, tmpParmStr, clientParmStrAddress, ParmStrLength, 1);
+        parmStrParamCopy.flags |= RMAPI_PARAM_COPY_FLAGS_ZERO_BUFFER;
        RmStatus = rmapiParamsAcquire(&parmStrParamCopy, NV_TRUE);
        if (RmStatus != NV_OK)
        {
@ -2027,7 +2029,6 @@ static NV_STATUS RmGetAllocPrivate(
    PMEMORY_DESCRIPTOR pMemDesc;
    NvU32 pageOffset;
    NvU64 pageCount;
-    NvU64 endingOffset;
    RsResourceRef *pResourceRef;
    RmResource *pRmResource;
    void *pMemData;
@ -2088,9 +2089,8 @@ static NV_STATUS RmGetAllocPrivate(
    if (rmStatus != NV_OK)
        goto done;

-    endingOffset = pageOffset + length;
-    pageCount = (endingOffset / os_page_size);
-    pageCount += (*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0));
+    pageCount = ((pageOffset + length) / os_page_size);
+    pageCount += (*pPageIndex + (((pageOffset + length) % os_page_size) ? 1 : 0));

    if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount))
    {
--- a/src/nvidia/arch/nvalloc/unix/src/osinit.c
+++ b/src/nvidia/arch/nvalloc/unix/src/osinit.c
@ -638,15 +638,6 @@ osInitNvMapping(
    sysApplyLockingPolicy(pSys);

    pGpu->busInfo.IntLine = nv->interrupt_line;
-
-    //
-    // Set the DMA address size as soon as we have the HAL to call to
-    // determine the precise number of physical address bits supported
-    // by the architecture. DMA allocations should not be made before
-    // this point.
-    //
-    nv_set_dma_address_size(nv, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM));
-
    pGpu->dmaStartAddress = (RmPhysAddr)nv_get_dma_start_address(nv);
    if (nv->fb != NULL)
    {
@ -735,6 +726,15 @@ osTeardownScalability(
    return clTeardownPcie(pGpu, pCl);
 }

+static inline void
+RmSetDeviceDmaAddressSize(
+    nv_state_t *nv,
+    NvU8 numDmaAddressBits
+)
+{
+    nv_set_dma_address_size(nv, numDmaAddressBits);
+}
+
 static void
 populateDeviceAttributes(
    OBJGPU  *pGpu,
@ -884,6 +884,8 @@ RmInitNvDevice(
        return;
    }

+    RmSetDeviceDmaAddressSize(nv, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM));
+
    os_disable_console_access();

    status->rmStatus = gpumgrStateInitGpu(pGpu);
@ -1187,7 +1189,7 @@ NvBool RmInitPrivateState(
    // Set up a reasonable default DMA address size, based on the minimum
    // possible on currently supported GPUs.
    //
-    nv_set_dma_address_size(pNv, NV_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
+    RmSetDeviceDmaAddressSize(pNv, NV_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);

    os_mem_set(nvp, 0, sizeof(*nvp));
    nvp->status = NV_ERR_INVALID_STATE;
@ -1581,7 +1583,7 @@ NvBool RmInitAdapter(
    //
    if (nv->request_firmware)
    {
-        nv_set_dma_address_size(nv, NV_GSP_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
+        RmSetDeviceDmaAddressSize(nv, NV_GSP_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);

        gspFwHandle = nv_get_firmware(nv, NV_FIRMWARE_GSP,
                                      &gspFw.pBuf,
--- a/src/nvidia/generated/g_kernel_nvlink_nvoc.c
+++ b/src/nvidia/generated/g_kernel_nvlink_nvoc.c
@ -214,17 +214,6 @@ void __nvoc_init_dataField_KernelNvlink(KernelNvlink *pThis, RmHalspecOwner *pRm
        pThis->setProperty(pThis, PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_REFCNT, ((NvBool)(0 == 0)));
    }

-    // NVOC Property Hal field -- PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK
-    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x08000000UL) )) /* ChipHal: GH100 */ 
-    {
-        pThis->setProperty(pThis, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK, ((NvBool)(0 == 0)));
-    }
-    // default
-    else
-    {
-        pThis->setProperty(pThis, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK, ((NvBool)(0 != 0)));
-    }
-
    // NVOC Property Hal field -- PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING
    if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x0870fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | GH100 */ 
    {
@ -646,22 +635,6 @@ static void __nvoc_init_funcTable_KernelNvlink_1(KernelNvlink *pThis, RmHalspecO
        }
    }

-    // Hal function -- knvlinkIsFloorSweepingNeeded
-    if (0)
-    {
-    }
-    else if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* RmVariantHal: PF_KERNEL_ONLY */ 
-    {
-        if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x08000000UL) )) /* ChipHal: GH100 */ 
-        {
-            pThis->__knvlinkIsFloorSweepingNeeded__ = &knvlinkIsFloorSweepingNeeded_GH100;
-        }
-        else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x0070ffe0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 */ 
-        {
-            pThis->__knvlinkIsFloorSweepingNeeded__ = &knvlinkIsFloorSweepingNeeded_491d52;
-        }
-    }
-
    pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelNvlink_engstateConstructEngine;

    pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelNvlink_engstateStatePreInitLocked;
--- a/src/nvidia/generated/g_kernel_nvlink_nvoc.h
+++ b/src/nvidia/generated/g_kernel_nvlink_nvoc.h
@ -218,7 +218,6 @@ struct KernelNvlink {
    NV_STATUS (*__knvlinkIsAliSupported__)(OBJGPU *, struct KernelNvlink *);
    NV_STATUS (*__knvlinkPostSetupNvlinkPeer__)(OBJGPU *, struct KernelNvlink *);
    NV_STATUS (*__knvlinkDiscoverPostRxDetLinks__)(OBJGPU *, struct KernelNvlink *, OBJGPU *);
-    NvBool (*__knvlinkIsFloorSweepingNeeded__)(OBJGPU *, struct KernelNvlink *, NvU32, NvU32);
    NV_STATUS (*__knvlinkReconcileTunableState__)(POBJGPU, struct KernelNvlink *, void *);
    NV_STATUS (*__knvlinkStateInitLocked__)(POBJGPU, struct KernelNvlink *);
    NV_STATUS (*__knvlinkStatePreLoad__)(POBJGPU, struct KernelNvlink *, NvU32);
@ -237,7 +236,6 @@ struct KernelNvlink {
    NvBool PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED;
    NvBool PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_SUPPORTED;
    NvBool PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_REFCNT;
-    NvBool PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK;
    NvBool PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING;
    NvBool PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED;
    NvBool PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD;
@ -318,16 +316,12 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelNvlink;
    ((KernelNvlink*)__nvoc_dynamicCast(staticCast((pThis), Dynamic), classInfo(KernelNvlink)))
 #endif //__nvoc_kernel_nvlink_h_disabled

-#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_CAST
-#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_NAME PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING
 #define PDB_PROP_KNVLINK_MINION_GFW_BOOT_BASE_CAST
 #define PDB_PROP_KNVLINK_MINION_GFW_BOOT_BASE_NAME PDB_PROP_KNVLINK_MINION_GFW_BOOT
-#define PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK_BASE_CAST
-#define PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK_BASE_NAME PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK
-#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_CAST
-#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED
 #define PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED_BASE_CAST
 #define PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED_BASE_NAME PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED
+#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_CAST
+#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED
 #define PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING_BASE_CAST
 #define PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING_BASE_NAME PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING
 #define PDB_PROP_KNVLINK_ENABLED_BASE_CAST
@ -342,10 +336,12 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelNvlink;
 #define PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED_BASE_NAME PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED
 #define PDB_PROP_KNVLINK_IS_MISSING_BASE_CAST __nvoc_base_OBJENGSTATE.
 #define PDB_PROP_KNVLINK_IS_MISSING_BASE_NAME PDB_PROP_ENGSTATE_IS_MISSING
-#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_CAST
-#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_NAME PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6
+#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_CAST
+#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_NAME PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING
 #define PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED_BASE_CAST
 #define PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED_BASE_NAME PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED
+#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_CAST
+#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_NAME PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6
 #define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD_BASE_CAST
 #define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD
 #define PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING_BASE_CAST
@ -400,8 +396,6 @@ NV_STATUS __nvoc_objCreate_KernelNvlink(KernelNvlink**, Dynamic*, NvU32);
 #define knvlinkPostSetupNvlinkPeer_HAL(pGpu, pKernelNvlink) knvlinkPostSetupNvlinkPeer_DISPATCH(pGpu, pKernelNvlink)
 #define knvlinkDiscoverPostRxDetLinks(pGpu, pKernelNvlink, pPeerGpu) knvlinkDiscoverPostRxDetLinks_DISPATCH(pGpu, pKernelNvlink, pPeerGpu)
 #define knvlinkDiscoverPostRxDetLinks_HAL(pGpu, pKernelNvlink, pPeerGpu) knvlinkDiscoverPostRxDetLinks_DISPATCH(pGpu, pKernelNvlink, pPeerGpu)
-#define knvlinkIsFloorSweepingNeeded(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl) knvlinkIsFloorSweepingNeeded_DISPATCH(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl)
-#define knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl) knvlinkIsFloorSweepingNeeded_DISPATCH(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl)
 #define knvlinkReconcileTunableState(pGpu, pEngstate, pTunableState) knvlinkReconcileTunableState_DISPATCH(pGpu, pEngstate, pTunableState)
 #define knvlinkStateInitLocked(pGpu, pEngstate) knvlinkStateInitLocked_DISPATCH(pGpu, pEngstate)
 #define knvlinkStatePreLoad(pGpu, pEngstate, arg0) knvlinkStatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
@ -1074,19 +1068,6 @@ static inline NV_STATUS knvlinkSetUniqueFlaBaseAddress(OBJGPU *pGpu, struct Kern

 #define knvlinkSetUniqueFlaBaseAddress_HAL(pGpu, pKernelNvlink, arg0) knvlinkSetUniqueFlaBaseAddress(pGpu, pKernelNvlink, arg0)

-NV_STATUS knvlinkFloorSweep_IMPL(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numLinksPerIp, NvU32 *pNumActiveLinks);
-
-#ifdef __nvoc_kernel_nvlink_h_disabled
-static inline NV_STATUS knvlinkFloorSweep(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numLinksPerIp, NvU32 *pNumActiveLinks) {
-    NV_ASSERT_FAILED_PRECOMP("KernelNvlink was disabled!");
-    return NV_ERR_NOT_SUPPORTED;
-}
-#else //__nvoc_kernel_nvlink_h_disabled
-#define knvlinkFloorSweep(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks) knvlinkFloorSweep_IMPL(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks)
-#endif //__nvoc_kernel_nvlink_h_disabled
-
-#define knvlinkFloorSweep_HAL(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks) knvlinkFloorSweep(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks)
-
 static inline NvU64 knvlinkGetUniqueFabricBaseAddress_72249a(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink) {
    return pKernelNvlink->fabricBaseAddr;
 }
@ -1391,16 +1372,6 @@ static inline NV_STATUS knvlinkDiscoverPostRxDetLinks_DISPATCH(OBJGPU *pGpu, str
    return pKernelNvlink->__knvlinkDiscoverPostRxDetLinks__(pGpu, pKernelNvlink, pPeerGpu);
 }

-static inline NvBool knvlinkIsFloorSweepingNeeded_491d52(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl) {
-    return ((NvBool)(0 != 0));
-}
-
-NvBool knvlinkIsFloorSweepingNeeded_GH100(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl);
-
-static inline NvBool knvlinkIsFloorSweepingNeeded_DISPATCH(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl) {
-    return pKernelNvlink->__knvlinkIsFloorSweepingNeeded__(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl);
-}
-
 static inline NV_STATUS knvlinkReconcileTunableState_DISPATCH(POBJGPU pGpu, struct KernelNvlink *pEngstate, void *pTunableState) {
    return pEngstate->__knvlinkReconcileTunableState__(pGpu, pEngstate, pTunableState);
 }
--- a/src/nvidia/generated/g_mem_desc_nvoc.h
+++ b/src/nvidia/generated/g_mem_desc_nvoc.h
@ -1077,7 +1077,6 @@ void memdescUnmapInternal(OBJGPU *pGpu, MEMORY_DESCRIPTOR *pMemDesc, NvU32 flags
 // currently for this, so a WAR is required for r515. The intent
 // is to remove this by r525.
 //
-#define MEMDESC_FLAGS_WSL_SHARED_MEMORY             NVBIT64(46)

 #endif // _MEMDESC_H_

--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@ -849,6 +849,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2203, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090 Ti" },
    { 0x2204, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090" },
    { 0x2206, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
+    { 0x2207, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti" },
    { 0x2208, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti" },
    { 0x220A, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
    { 0x220D, 0x0000, 0x0000, "NVIDIA CMP 90HX" },
@ -873,6 +874,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2236, 0x1482, 0x10de, "NVIDIA A10" },
    { 0x2237, 0x152f, 0x10de, "NVIDIA A10G" },
    { 0x2238, 0x1677, 0x10de, "NVIDIA A10M" },
+    { 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
    { 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
    { 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
    { 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },
@ -900,6 +902,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x24B9, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
    { 0x24BA, 0x0000, 0x0000, "NVIDIA RTX A4500 Laptop GPU" },
    { 0x24BB, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
+    { 0x24C9, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
    { 0x24DC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Laptop GPU" },
    { 0x24DD, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Laptop GPU" },
    { 0x24E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti Laptop GPU" },
@ -915,6 +918,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" },
    { 0x2531, 0x151d, 0x10de, "NVIDIA RTX A2000" },
    { 0x2531, 0x151d, 0x17aa, "NVIDIA RTX A2000" },
+    { 0x2544, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060" },
    { 0x2560, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" },
    { 0x2563, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" },
    { 0x2571, 0x1611, 0x1028, "NVIDIA RTX A2000 12GB" },
@ -944,6 +948,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x25E5, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Laptop GPU" },
    { 0x25F9, 0x0000, 0x0000, "NVIDIA RTX A1000 Embedded GPU" },
    { 0x25FA, 0x0000, 0x0000, "NVIDIA RTX A2000 Embedded GPU" },
+    { 0x2684, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090" },
    { 0x13BD, 0x11cc, 0x10DE, "GRID M10-0B" },
    { 0x13BD, 0x11cd, 0x10DE, "GRID M10-1B" },
    { 0x13BD, 0x11ce, 0x10DE, "GRID M10-0Q" },
--- a/src/nvidia/src/kernel/gpu/bus/arch/pascal/kern_bus_gp100.c
+++ b/src/nvidia/src/kernel/gpu/bus/arch/pascal/kern_bus_gp100.c
@ -309,7 +309,6 @@ kbusRemoveNvlinkPeerMapping_GP100
    NvU32         peerGpuInst     = gpuGetInstance(pGpu1);
    KernelNvlink *pKernelNvlink0  = GPU_GET_KERNEL_NVLINK(pGpu0);
    NvBool        bLoopback       = (pGpu0 == pGpu1);
-    NvBool        bBufferReady     = NV_FALSE;

    NV_ASSERT_OR_RETURN(pKernelNvlink0 != NULL, NV_ERR_NOT_SUPPORTED);

@ -416,23 +415,8 @@ kbusRemoveNvlinkPeerMapping_GP100
            }
        }

-        //
        // Call knvlinkUpdateCurrentConfig to flush settings to the registers
-        // Skip this call if buffer ready is set and CONFIG_REQUIRE_INITIALIZED is true
-        //
-        status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu0, pKernelNvlink0);
-        if (status != NV_OK)
-        {
-            NV_ASSERT(status == NV_OK);
-            return status;
-        }
-
-        bBufferReady = ((pKernelNvlink0->initializedLinks & pKernelNvlink0->peerLinkMasks[peerId]) != 0) ? NV_TRUE : NV_FALSE;
-        if (!pKernelNvlink0->getProperty(pKernelNvlink0, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK) ||
-            !bBufferReady)
-        {
-            status = knvlinkUpdateCurrentConfig(pGpu0, pKernelNvlink0);
-        }
+        status = knvlinkUpdateCurrentConfig(pGpu0, pKernelNvlink0);
    }

    return status;
--- a/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
+++ b/src/nvidia/src/kernel/gpu/fifo/kernel_channel.c
@ -3756,7 +3756,6 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    Memory *pMemory;
    ContextDma *pContextDma;
    NvU32 addressSpace;
-    NvU64 notificationBufferSize;
    NV_STATUS status;

    hNotifier = pKernelChannel->hErrorContext;
@ -3765,8 +3764,6 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR,
                     NV_ERR_INVALID_ARGUMENT);

-    notificationBufferSize = (index + 1) * sizeof(NvNotification);
-
    status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice);
    if (status != NV_OK)
        return NV_ERR_INVALID_DEVICE;
@ -3775,7 +3772,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    {
        addressSpace = memdescGetAddressSpace(pMemory->pMemDesc);

-        NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= notificationBufferSize,
+        NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= ((index + 1) * sizeof(NvNotification)),
                         NV_ERR_OUT_OF_RANGE);
        switch (addressSpace)
        {
@ -3793,7 +3790,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
                                         &pDmaMappingInfo),
                    NV_ERR_GENERIC);

-                NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= notificationBufferSize,
+                NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= ((index + 1) * sizeof(NvNotification)),
                                 NV_ERR_OUT_OF_RANGE);
                break;
            }
@ -3808,7 +3805,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
    }
    else if (NV_OK == ctxdmaGetByHandle(pClient, hNotifier, &pContextDma))
    {
-        NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (notificationBufferSize - 1),
+        NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (((index + 1) * sizeof(NvNotification)) - 1),
                         NV_ERR_OUT_OF_RANGE);
    }
    else
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_mgr.c
@ -1927,7 +1927,6 @@ memmgrFillComprInfo_IMPL
 {
    const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
        kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
-    NvU32 size;

    portMemSet(pComprInfo, 0, sizeof(*pComprInfo));

@ -1938,12 +1937,10 @@ memmgrFillComprInfo_IMPL

    NV_ASSERT(compTagStartOffset != ~(NvU32)0);

-    size = pageSize * pageCount;
-
    pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
    pComprInfo->compTagLineMin = compTagStartOffset;
    pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
-    pComprInfo->compPageIndexHi = (NvU32)((surfOffset + size - 1) >> pComprInfo->compPageShift);
+    pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
    pComprInfo->compTagLineMultiplier = 1;

    return NV_OK;
--- a/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/arch/ampere/kernel_nvlink_ga100.c
@ -132,8 +132,6 @@ knvlinkRemoveMapping_GA100
 )
 {
    NV_STATUS status = NV_OK;
-    NvU32     peerId;
-    NvBool    bBufferReady = NV_FALSE;

    NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_PARAMS params;
    portMemSet(&params, 0, sizeof(params));
@ -163,35 +161,7 @@ knvlinkRemoveMapping_GA100
    // the MUX registers and the connection config registers. So, we have
    // to call nvlinkCurrentConfig instead of nvlinkUpdateHshubConfigRegs
    //
-    status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
-    if (status != NV_OK)
-    {
-        NV_ASSERT(status == NV_OK);
-        return status;
-    }
-
-    if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK))
-    {
-        FOR_EACH_INDEX_IN_MASK(32, peerId, peerMask)
-        {
-            if (pKernelNvlink->initializedLinks & pKernelNvlink->peerLinkMasks[peerId])
-            {
-                bBufferReady = NV_TRUE;
-                break;
-            }
-        } FOR_EACH_INDEX_IN_MASK_END;
-
-        if (!bBufferReady)
-        {
-            status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
-        }
-    }
-    else
-    {
-        status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
-    }
-
-    return status;
+    return knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
 }

 /*!
--- a/src/nvidia/src/kernel/gpu/nvlink/arch/hopper/kernel_nvlink_gh100.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/arch/hopper/kernel_nvlink_gh100.c
@ -248,49 +248,3 @@ knvlinkDiscoverPostRxDetLinks_GH100

    return status;
 }
-
-/*!
- * @brief  Check if floorsweeping is needed for this particular chip
- *
- * @param[in]  pGpu            OBJGPU pointer
- * @param[in]  pKernelNvlink   KernelNvlink pointer
- *
- * @returns On success, sets unique fabric address and returns NV_OK.
- *          On failure, returns NV_ERR_XXX.
- */
-NvBool
-knvlinkIsFloorSweepingNeeded_GH100
-(
-    OBJGPU       *pGpu,
-    KernelNvlink *pKernelNvlink,
-    NvU32         numActiveLinksPerIoctrl,
-    NvU32         numLinksPerIoctrl
-)
-{
-
-    //
-    // Only floorsweep down the given GPU if the following conditions are met:
-    // 1. if the number of links for the IP is > 0
-    //
-    // 2. The number of active links allowed for the IOCTRL is less then the
-    //    total number of links for the IOCTRL. No reason to spend time in code
-    //    if the exectution of it will be a NOP
-    //
-    // 3. If the GPU has never been floorswept. An optimization to make sure RM
-    //    doesn't burn cycles repeatedly running running code that will be a NOP
-    //
-    // 4. (temporary) Run only on Silicon chips. Fmodel currently doesn't support
-    //    this feature
-    //
-
-    if ((numLinksPerIoctrl > 0 && numActiveLinksPerIoctrl > 0) &&
-        numActiveLinksPerIoctrl < numLinksPerIoctrl            &&
-        !pKernelNvlink->bFloorSwept                            &&
-        IS_SILICON(pGpu))
-    {
-        return NV_TRUE;
-    }
-
-    return NV_FALSE;
-}
-
--- a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlink.c
@ -349,11 +349,7 @@ knvlinkGetP2pConnectionStatus_IMPL
    }

    // Get the remote ends of the links of local GPU from the nvlink core
-    status = knvlinkCoreGetRemoteDeviceInfo(pGpu0, pKernelNvlink0);
-    if (status != NV_OK)
-    {
-        return status;
-    }
+    knvlinkCoreGetRemoteDeviceInfo(pGpu0, pKernelNvlink0);

    // Post topology link enable on links of local GPU
    status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0,
@ -369,11 +365,7 @@ knvlinkGetP2pConnectionStatus_IMPL
        if (knvlinkGetNumLinksToPeer(pGpu1, pKernelNvlink1, pGpu0) != numPeerLinks)
        {
            // Get the remote ends of the links of remote GPU from the nvlink core
-            status = knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);
-            if (status != NV_OK)
-            {
-                return status;
-            }
+            knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);

            // Post topology link enable on links of remote GPU
            status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1,
@ -492,12 +484,12 @@ knvlinkUpdateCurrentConfig_IMPL
        {
            pKCe = GPU_GET_KCE(pGpu, i);
            if (pKCe)
-        {
-            status = kceTopLevelPceLceMappingsUpdate(pGpu, pKCe);
-            if (status != NV_OK)
            {
-                NV_PRINTF(LEVEL_ERROR, "Failed to update PCE-LCE mappings\n");
-            }
+                status = kceTopLevelPceLceMappingsUpdate(pGpu, pKCe);
+                if (status != NV_OK)
+                {
+                    NV_PRINTF(LEVEL_ERROR, "Failed to update PCE-LCE mappings\n");
+                }
                break;
            }
        }
@ -815,8 +807,8 @@ knvlinkPrepareForXVEReset_IMPL

    // Remove all NVLink mappings in HSHUB config registers to init values
    if (!API_GPU_IN_RESET_SANITY_CHECK(pGpu) && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST))
-    status = knvlinkRemoveMapping_HAL(pGpu, pKernelNvlink, NV_TRUE, ((1 << NVLINK_MAX_PEERS_SW) - 1),
-                                      NV_FALSE /* bL2Entry */);
+        status = knvlinkRemoveMapping_HAL(pGpu, pKernelNvlink, NV_TRUE, ((1 << NVLINK_MAX_PEERS_SW) - 1),
+                                          NV_FALSE /* bL2Entry */);
    if (status != NV_OK)
    {
        NV_PRINTF(LEVEL_ERROR,
@ -1217,7 +1209,7 @@ knvlinkUpdateLinkConnectionStatus_IMPL

 #if defined(INCLUDE_NVLINK_LIB)

-    params.bConnected = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected;
+    params.bConnected       = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected;
    params.remoteDeviceType = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType;
    params.remoteLinkNumber = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;

--- a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkapi.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkapi.c
@ -241,7 +241,6 @@ subdeviceCtrlCmdBusGetNvlinkStatus_IMPL
    NvU32  r = 0;
    NvBool bPeerLink, bSysmemLink, bSwitchLink;
    NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_INFO_PARAMS nvlinkLinkAndClockInfoParams;
-    NvBool bIsNvlinkReady = NV_TRUE;

    //
    // vGPU:
@ -288,28 +287,9 @@ subdeviceCtrlCmdBusGetNvlinkStatus_IMPL
    {
        // Get the nvlink connections for this device from the core
        knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink);
-
-
-        //
-        // Get the nvlink connections for this device from the core
-        // If the function fails then the corelib doesn't have enough
-        // info to validate connectivity so we should mark the API call
-        // as not ready
-        //
-        status = knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink);
-        if (status == NV_ERR_NOT_READY)
-        {
-            NV_PRINTF(LEVEL_INFO, "Nvlink is not ready yet!\n");
-            bIsNvlinkReady = NV_FALSE;
-        }
-        else if (status != NV_OK)
-        {
-            return status;
-        }
    }

-    // If nvlink is not ready don't report back any links as being enabled
-    pParams->enabledLinkMask = (bIsNvlinkReady) ? pKernelNvlink->enabledLinks : 0x0;
+    pParams->enabledLinkMask = pKernelNvlink->enabledLinks;

    r = pParams->enabledLinkMask;
    while (r >>= 1 ) i++;
--- a/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c
+++ b/src/nvidia/src/kernel/gpu/nvlink/kernel_nvlinkcorelibtrain.c
@ -75,13 +75,16 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL

 #if defined(INCLUDE_NVLINK_LIB)

-    OBJSYS *pSys                  = SYS_GET_INSTANCE();
-    NvU32   flags                 = NVLINK_STATE_CHANGE_SYNC;
-    NvBool  bNvswitchProxyPresent = NV_FALSE;
-    NvBool  bUpdateConnStatus     = NV_FALSE;
-    NvBool  bCheckDegradedMode    = NV_FALSE;
+    OBJSYS   *pSys                  = SYS_GET_INSTANCE();
+    NvU32     flags                 = NVLINK_STATE_CHANGE_SYNC;
+    NvBool    bNvswitchProxyPresent = NV_FALSE;
+    NvBool    bUpdateConnStatus     = NV_FALSE;
+    NvBool    bCheckDegradedMode    = NV_FALSE;
+    NvU32     linkId;
+    NvU32     tmpDisabledLinkMask    = 0;
+    NvU32     tmpEnabledLinkMask     = 0;
    nvlink_conn_info conn_info;
-    NvU32   linkId;
+
    NvU32     numActiveLinksPerIoctrl = 0;
    NvU32     numLinksPerIoctrl       = 0;

@ -91,6 +94,18 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL
    //
    if (!knvlinkPoweredUpForD3_HAL(pGpu, pKernelNvlink))
    {
+        if (pKernelNvlink->bEnableAli)
+        {
+            // Update the post Rx Det link Mask for the GPU
+            knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink);
+        }
+
+        if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40)
+        {
+            numActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
+            numLinksPerIoctrl       = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
+        }
+
        //
        // Optimization: Check for nvlink proxy only when system fabric is externally
        // managed. This would avoid RPCs in non-nvswitch cases.
@ -100,26 +115,115 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL
            bNvswitchProxyPresent = knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink);
        }

-        if (pKernelNvlink->bEnableAli)
-        {
-            // Update the post Rx Det link Mask for the GPU
-            knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink);
-        }
-
+        //
+        // If on Nvlink4.0+ then before topology discovery is performed then
+        // first enter the corelib floorsweeping function to floorsweep down
+        // the GPU if requested.
+        // This path does not cache connection info since its main purpose is to
+        // edit the connection information before RM tries to cache and update itself
+        //
        if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40                     &&
            !bNvswitchProxyPresent                                              &&
            !pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED) &&
-            pKernelNvlink->pNvlinkDev != NULL)
+            pKernelNvlink->pNvlinkDev != NULL                                   &&
+            !pKernelNvlink->bFloorSwept                                         &&
+            IS_SILICON(pGpu)                                                    &&
+            numActiveLinksPerIoctrl < numLinksPerIoctrl)
        {
-            numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
-            status = knvlinkFloorSweep(pGpu, pKernelNvlink,
-                                    numLinksPerIoctrl, &numActiveLinksPerIoctrl);
+            // The path here is important not getting the connection info
+            FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
+            {
+                nvlink_lib_discover_and_get_remote_conn_info(
+                            pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0);
+            }
+            FOR_EACH_INDEX_IN_MASK_END;

+            // floorsweeping in corelib will update connection info that RM qill query below
+            (void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev);
+
+            //
+            // If a link in the enabledLinkMask is not trained after floorsweeping then
+            // then add it to a tmp disabled linkMask
+            //
+
+            // Get the link train status for the enabled link masks
+            NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
+
+            portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
+            linkTrainedParams.linkMask    = pKernelNvlink->enabledLinks;
+            linkTrainedParams.bActiveOnly = NV_TRUE;
+
+            // Reset timeout to clear any accumulated timeouts from link init
+            if (IS_GSP_CLIENT(pGpu))
+            {
+                threadStateResetTimeout(pGpu);
+            }
+
+            status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
+                                         NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
+                                         (void *)&linkTrainedParams,
+                                         sizeof(linkTrainedParams));
            if (status != NV_OK)
            {
-                NV_PRINTF(LEVEL_ERROR, "Failed to floorsweep valid nvlink config!\n");
-                return NV_ERR_NOT_READY;
+                NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
+                return status;
+            }
+
+            //
+            // Create a temporary mask of all links that are now enabled:
+            // classified as a link in active
+            //
+            FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
+            {
+                if (linkTrainedParams.bIsLinkActive[linkId])
+                {
+                    tmpEnabledLinkMask |= BIT(linkId);
                }
+                else
+                {
+                    tmpDisabledLinkMask |= BIT(linkId);
+                }
+            }
+            FOR_EACH_INDEX_IN_MASK_END;
+
+            // Redo linkMasks based on the search above being the ground truth
+            pKernelNvlink->enabledLinks           = tmpEnabledLinkMask;
+
+            //
+            // remove any links not in active in the tmpEnabledLinkMask from all
+            // other link masks as these have been floorswept by the corelib
+            //
+            pKernelNvlink->disconnectedLinkMask    = tmpEnabledLinkMask;
+            pKernelNvlink->initDisabledLinksMask   = tmpDisabledLinkMask;
+
+
+            status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink);
+            if (status != NV_OK)
+            {
+                NV_ASSERT(status == NV_OK);
+                return status;
+            }
+
+            // Re-sync the link masks with GSP
+            status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
+            if (status != NV_OK)
+            {
+                NV_ASSERT(status == NV_OK);
+                return status;
+            }
+
+
+            NV_PRINTF(LEVEL_INFO,
+                  "Post Floorsweeping: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n",
+                  pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask);
+            pKernelNvlink->bFloorSwept = NV_TRUE;
+
+            //
+            // Assert that the number of links in active is always less then
+            // or equal to the number of active links on the chips
+            //
+            NV_ASSERT_OR_ELSE_STR((nvPopCount32(tmpEnabledLinkMask) <= numActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask)),
+                        "Mismatch between links in active and #of links supported!\n", return NV_ERR_INVALID_STATE);
        }

        // We only need to look at links that are still considered disconnected
@ -438,7 +542,7 @@ knvlinkCheckTrainingIsComplete_IMPL
        {
            if (pKernelNvlink0->bLinkTrainingDebugSpew)
            {
-            NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
+                NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
            }
            return NV_ERR_GENERIC;
        }
@ -495,7 +599,7 @@ knvlinkCheckTrainingIsComplete_IMPL
            {
                if (pKernelNvlink1->bLinkTrainingDebugSpew)
                {
-                NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
+                    NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
                }

                return NV_ERR_GENERIC;
@ -1279,140 +1383,6 @@ knvlinkRetrainLink_IMPL
    return status;
 }

-/*!
- * @brief Floorsweep the nvlink config for the chip
- *
- * @param[in]  pGpu            OBJGPU pointer
- * @param[in]  pKernelNvlink   KernelNvlink pointer
- * @param[in]  numLinksPerIp   number of total links found in discovery
- * @param[out] pNumLinkActive  number of links needed to be active
- *
- * @returns On success, sets unique fabric address and returns NV_OK.
- *          On failure, returns NV_ERR_XXX.
- */
-NV_STATUS
-knvlinkFloorSweep_IMPL
-(
-    OBJGPU *pGpu,
-    KernelNvlink *pKernelNvlink,
-    NvU32         numLinksPerIoctrl,
-    NvU32        *pNumActiveLinksPerIoctrl
-)
-{
-
-#if defined(INCLUDE_NVLINK_LIB)
-    NV_STATUS status = NV_OK;
-    NvU32   linkId;
-    NvU32   tmpDisabledLinkMask    = 0;
-    NvU32   tmpEnabledLinkMask     = 0;
-    nvlink_conn_info conn_info;
-
-    *pNumActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
-    if (!knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, *pNumActiveLinksPerIoctrl, numLinksPerIoctrl))
-    {
-        return NV_OK;
-    }
-
-    // The path here is important not getting the connection info
-    FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
-    {
-        nvlink_lib_discover_and_get_remote_conn_info(
-                    pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0);
-    }
-    FOR_EACH_INDEX_IN_MASK_END;
-
-    // floorsweeping in corelib will update connection info that RM qill query below
-    (void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev);
-
-    //
-    // If a link in the enabledLinkMask is not trained after floorsweeping then
-    // then add it to a tmp disabled linkMask
-    //
-
-    // Get the link train status for the enabled link masks
-    NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
-
-    portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
-    linkTrainedParams.linkMask    = pKernelNvlink->enabledLinks;
-    linkTrainedParams.bActiveOnly = NV_TRUE;
-
-    // Reset timeout to clear any accumulated timeouts from link init
-    if (IS_GSP_CLIENT(pGpu))
-    {
-        threadStateResetTimeout(pGpu);
-    }
-
-    status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
-                                 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
-                                 (void *)&linkTrainedParams,
-                                 sizeof(linkTrainedParams));
-    if (status != NV_OK)
-    {
-        NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
-        return status;
-    }
-
-    //
-    // Create a temporary mask of all links that are now enabled:
-    // classified as a link in active
-    //
-    FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
-    {
-        if (linkTrainedParams.bIsLinkActive[linkId])
-        {
-            tmpEnabledLinkMask |= BIT(linkId);
-        }
-        else
-        {
-            tmpDisabledLinkMask |= BIT(linkId);
-        }
-    }
-    FOR_EACH_INDEX_IN_MASK_END;
-
-    // Redo linkMasks based on the search above being the ground truth
-    pKernelNvlink->enabledLinks          = tmpEnabledLinkMask;
-
-    //
-    // remove any links not in active in the tmpEnabledLinkMask from all
-    // other link masks as these have been floorswept by the corelib
-    //
-    pKernelNvlink->disconnectedLinkMask    = tmpEnabledLinkMask;
-    pKernelNvlink->initDisabledLinksMask   = tmpDisabledLinkMask;
-
-
-    status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink);
-    if (status != NV_OK)
-    {
-        NV_ASSERT(status == NV_OK);
-        return status;
-    }
-
-    // Re-sync the link masks with GSP
-    status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
-    if (status != NV_OK)
-    {
-        NV_ASSERT(status == NV_OK);
-        return status;
-    }
-
-    //
-    // Assert that the number of links in active is always less then
-    // or equal to the number of active links on the chips
-    //
-    if(!(nvPopCount32(tmpEnabledLinkMask) <= *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask)))
-    {
-        NV_PRINTF(LEVEL_INFO,
-              "Floorsweeping didn't work! enabledMaskCount: 0x%x and numActiveLinksTotal: 0x%x. Current link info cached in SW: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n",
-              nvPopCount32(tmpEnabledLinkMask), *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask), pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask);
-
-        return NV_ERR_NOT_READY;
-    }
-
-    pKernelNvlink->bFloorSwept = NV_TRUE;
-#endif //INCLUDE_NVLINK_LIB
-    return NV_OK;
-}
-
 /*!
 * @brief Retrain the link from OFF state
 *
--- a/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c
+++ b/src/nvidia/src/kernel/gpu_mgr/gpu_mgr.c
@ -758,8 +758,6 @@ NvBool gpumgrIsDeviceRmFirmwareCapable
        0x2236, // A10   SKU215     Pris-24
        0x2237, // A10G  SKU215     Pris-24
        0x25B6, // A16
-        0x20F5, // A800-80
-        0x20F6, // A800-40
    };
    NvU32 count = NV_ARRAY_ELEMENTS(defaultGspRmGpus);
    NvU32 i;
--- a/version.mk
+++ b/version.mk
@ -1,4 +1,4 @@
-NVIDIA_VERSION = 520.61.05
+NVIDIA_VERSION = 520.56.06

 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))