520.56.06

This commit is contained in:
Andy Ritger 2022-10-12 10:30:46 -07:00
parent 90eb10774f
commit 7c345b838b
No known key found for this signature in database
GPG Key ID: 6D466BB75E006CFC
40 changed files with 317 additions and 539 deletions

View File

@ -2,6 +2,12 @@
## Release 520 Entries
### [520.56.06] 2022-10-12
#### Added
- Introduce support for GeForce RTX 4090 GPUs.
### [520.61.05] 2022-10-10
#### Added

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 520.61.05.
version 520.56.06.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with gsp.bin
firmware and user-space NVIDIA GPU driver components from a corresponding
520.61.05 driver release. This can be achieved by installing
520.56.06 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -167,7 +167,7 @@ for the target kernel.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 520.61.05 release,
(see the table below). However, in the 520.56.06 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.61.05/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -685,6 +685,7 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 3090 Ti | 2203 |
| NVIDIA GeForce RTX 3090 | 2204 |
| NVIDIA GeForce RTX 3080 | 2206 |
| NVIDIA GeForce RTX 3070 Ti | 2207 |
| NVIDIA GeForce RTX 3080 Ti | 2208 |
| NVIDIA GeForce RTX 3080 | 220A |
| NVIDIA CMP 90HX | 220D |
@ -709,6 +710,7 @@ Subsystem Device ID.
| NVIDIA A10 | 2236 10DE 1482 |
| NVIDIA A10G | 2237 10DE 152F |
| NVIDIA A10M | 2238 10DE 1677 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |
@ -736,6 +738,7 @@ Subsystem Device ID.
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
| NVIDIA RTX A4500 Laptop GPU | 24BA |
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
@ -751,6 +754,7 @@ Subsystem Device ID.
| NVIDIA RTX A2000 | 2531 103C 151D |
| NVIDIA RTX A2000 | 2531 10DE 151D |
| NVIDIA RTX A2000 | 2531 17AA 151D |
| NVIDIA GeForce RTX 3060 | 2544 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.61.05\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"
EXTRA_CFLAGS += -Wno-unused-function

View File

@ -78,8 +78,13 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
#if defined(NVCPU_AARCH64)
#if defined(NV_MT_DEVICE_GRE_PRESENT)
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_GRE))
#else
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
PTE_ATTRINDX(MT_DEVICE_nGnRE))
#endif
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)

View File

@ -636,33 +636,27 @@ typedef enum
#define NV_GET_NV_STATE(pGpu) \
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
#define IS_REG_OFFSET(nv, offset, length) \
(((offset) >= (nv)->regs->cpu_address) && \
(((offset) + ((length)-1)) <= \
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
#define IS_FB_OFFSET(nv, offset, length) \
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
#define IS_UD_OFFSET(nv, offset, length) \
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
((offset) >= (nv)->ud.cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
#define IS_IMEM_OFFSET(nv, offset, length) \
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
(((offset) + ((length) - 1)) <= \
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
#define NV_RM_MAX_MSIX_LINES 8

View File

@ -2334,6 +2334,23 @@ compile_test() {
compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
;;
mt_device_gre)
#
# Determine if MT_DEVICE_GRE flag is present.
#
# MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
# ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
# (2021-06-01).
#
CODE="
#include <asm/memory.h>
unsigned int conftest_mt_device_gre(void) {
return MT_DEVICE_GRE;
}"
compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
;;
get_user_pages)
#
# Conftest for get_user_pages()

View File

@ -431,7 +431,7 @@ static int nvidia_mmap_numa(
const nv_alloc_mapping_context_t *mmap_context)
{
NvU64 start, addr;
NvU64 pages;
unsigned int pages;
NvU64 i;
pages = NV_VMA_SIZE(vma) >> PAGE_SHIFT;

View File

@ -1429,9 +1429,6 @@ static int nv_open_device(nv_state_t *nv, nvidia_stack_t *sp)
return -ENODEV;
}
if (unlikely(NV_ATOMIC_READ(nvl->usage_count) >= NV_S32_MAX))
return -EMFILE;
if ( ! (nv->flags & NV_FLAG_OPEN))
{
/* Sanity check: !NV_FLAG_OPEN requires usage_count == 0 */

View File

@ -243,6 +243,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vmalloc_has_pgprot_t_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_channel_state
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_dev_has_ats_enabled
NV_CONFTEST_TYPE_COMPILE_TESTS += mt_device_gre
NV_CONFTEST_TYPE_COMPILE_TESTS += remove_memory_has_nid_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += add_memory_driver_managed_has_mhp_flags_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2015-2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -207,6 +207,8 @@ static int nvlink_fops_release(struct inode *inode, struct file *filp)
nvlink_print(NVLINK_DBG_INFO, "nvlink driver close\n");
WARN_ON(private == NULL);
mutex_lock(&nvlink_drvctx.lock);
if (private->capability_fds.fabric_mgmt > 0)

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r521_82
#define NV_BUILD_BRANCH r521_90
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r521_82
#define NV_PUBLIC_BRANCH r521_90
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r520/r521_82-338"
#define NV_BUILD_CHANGELIST_NUM (31864828)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r520/r521_90-315"
#define NV_BUILD_CHANGELIST_NUM (31900380)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r520/r521_82-338"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31864828)
#define NV_BUILD_NAME "rel/gpu_drv/r520/r521_90-315"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31900380)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r521_82-4"
#define NV_BUILD_CHANGELIST_NUM (31858738)
#define NV_BUILD_BRANCH_VERSION "r521_90-15"
#define NV_BUILD_CHANGELIST_NUM (31900380)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "522.06"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31858738)
#define NV_BUILD_NAME "522.25"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (31900380)
#define NV_BUILD_BRANCH_BASE_VERSION R520
#endif
// End buildmeister python edited section

View File

@ -136,6 +136,7 @@ static const PNPVendorId PNPVendorIds[] =
{ "CLO", _VENDOR_NAME_ENTRY("Clone Computers/Analogy") },
{ "CLT", _VENDOR_NAME_ENTRY("automated computer control systems")},
{ "CMD", _VENDOR_NAME_ENTRY("CMD Technology") },
{ "CMN", _VENDOR_NAME_ENTRY("Chimei innolux corp.") },
{ "CMO", _VENDOR_NAME_ENTRY("Chi Mei Optoelectronics corp.") },
{ "CNI", _VENDOR_NAME_ENTRY("Connect International") },
{ "CNT", _VENDOR_NAME_ENTRY("CNet Technology") },

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "520.61.05"
#define NV_VERSION_STRING "520.56.06"
#else

View File

@ -89,7 +89,7 @@ typedef struct
typedef struct
{
NvU16 nodeId;
NvU16 linkIndex;
NvU32 linkIndex;
nvlink_pci_dev_info pciInfo;
} nvlink_endpoint;
@ -117,7 +117,7 @@ typedef struct
typedef struct
{
NvU16 nodeId;
NvU16 linkIndex;
NvU32 linkIndex;
nvlink_pci_dev_info pciInfo;
NvU8 devUuid[NVLINK_UUID_LEN];
NvU32 devType;
@ -189,9 +189,9 @@ typedef enum
/* link and sublink state of an nvlink endpoint */
typedef struct
{
NvU8 linkMode;
NvU8 txSubLinkMode;
NvU8 rxSubLinkMode;
NvU32 linkMode;
NvU32 txSubLinkMode;
NvU32 rxSubLinkMode;
} nvlink_link_state;
/*
@ -354,7 +354,7 @@ typedef struct
*/
typedef struct
{
NvU16 linkIndex;
NvU32 linkIndex;
NvBool initStatus;
} nvlink_link_init_status;
@ -503,7 +503,7 @@ typedef struct
*/
typedef struct
{
NvU16 linkIndex;
NvU32 linkIndex;
NV_DECLARE_ALIGNED(NvU64 tokenValue, 8);
} nvlink_token_info;
@ -1111,11 +1111,6 @@ typedef struct
NvU32 endStatesCount;
} nvlink_get_device_link_states;
/*
* Note: Verify that new parameter structs for IOCTLs satisfy
* sizing restrictions for all OSs they could be used in.
*/
#define CTRL_NVLINK_CHECK_VERSION 0x01
#define CTRL_NVLINK_SET_NODE_ID 0x02
#define CTRL_NVLINK_SET_TX_COMMON_MODE 0x03

View File

@ -78,18 +78,7 @@ nvlink_core_check_link_state
return NV_FALSE;
}
status = link->link_handlers->get_dl_link_mode(link, &crntDlLinkMode);
if (status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Unable to get DL link mode for %s:%s\n",
__FUNCTION__, link->dev->deviceName, link->linkName));
return NV_FALSE;
}
if (crntTlLinkMode == NVLINK_LINKSTATE_HS &&
(crntDlLinkMode == NVLINK_LINKSTATE_HS ||
crntDlLinkMode == NVLINK_LINKSTATE_SLEEP))
if (crntTlLinkMode == NVLINK_LINKSTATE_HS)
{
return NV_TRUE;
}

View File

@ -430,7 +430,7 @@ nvlink_core_powerdown_intranode_conns_from_active_to_off
// to track Failure
conns[i]->end0->inSWCFG = NV_FALSE;
}
}
else
{
conns[i]->end0->inSWCFG = NV_TRUE;
@ -448,14 +448,14 @@ nvlink_core_powerdown_intranode_conns_from_active_to_off
// to track Failure
conns[i]->end1->inSWCFG = NV_FALSE;
}
}
else
{
conns[i]->end1->inSWCFG = NV_TRUE;
}
// Change each sublink state to SAFE
if(conns[i]->end0->inSWCFG == NV_TRUE)
if(conns[i]->end0->inSWCFG == NV_TRUE)
{
conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0,
NVLINK_SUBLINK_STATE_TX_SAFE,
@ -928,8 +928,7 @@ nvlink_core_powerdown_floorswept_conns_to_off
{
nvlink_core_get_intranode_conn(links[j], &(conn));
if (conn == NULL ||
_nvlink_core_check_if_conn_in_array(visitedConns, connCount, conn) ||
(conn->end0 == NULL || conn->end1 == NULL))
_nvlink_core_check_if_conn_in_array(visitedConns, connCount, conn))
{
continue;
}

View File

@ -535,7 +535,6 @@ void nvswitch_reset_persistent_link_hw_state(nvswitch_device *device, NvU32
void nvswitch_store_topology_information(nvswitch_device *device, nvlink_link *link);
NvlStatus nvswitch_launch_ALI(nvswitch_device *device);
NvlStatus nvswitch_launch_ALI_link_training(nvswitch_device *device, nvlink_link *link, NvBool bSync);
NvlStatus nvswitch_inband_read_data(nvswitch_device *device, NvU8 *dest, NvU32 linkId, NvU32 *dataSize);
void nvswitch_filter_messages(nvswitch_device *device, NvU32 linkId);
NvlStatus nvswitch_set_training_mode(nvswitch_device *device);

View File

@ -210,7 +210,7 @@
#define NVSWITCH_HAL_FUNCTION_LIST_LS10(_op, _arch) \
_op(NvlStatus, nvswitch_launch_ALI, (nvswitch_device *device), _arch) \
_op(NvlStatus, nvswitch_launch_ALI_link_training, (nvswitch_device *device, nvlink_link *link, NvBool bSync), _arch) \
_op(NvlStatus, nvswitch_launch_ALI_link_training, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(NvlStatus, nvswitch_ctrl_inband_send_data, (nvswitch_device *device, NVSWITCH_INBAND_SEND_DATA_PARAMS *p), _arch) \
_op(NvlStatus, nvswitch_ctrl_inband_read_data, (nvswitch_device *device, NVSWITCH_INBAND_READ_DATA_PARAMS *p), _arch) \
_op(NvlStatus, nvswitch_ctrl_set_residency_bins, (nvswitch_device *device, NVSWITCH_SET_RESIDENCY_BINS *p), _arch) \

View File

@ -648,7 +648,7 @@ NvlStatus nvswitch_ctrl_get_nvlink_lp_counters_lr10(nvswitch_device *device, NVS
NvlStatus nvswitch_service_nvldl_fatal_link_lr10(nvswitch_device *device, NvU32 nvliptInstance, NvU32 link);
NvlStatus nvswitch_ctrl_inband_send_data_lr10(nvswitch_device *device, NVSWITCH_INBAND_SEND_DATA_PARAMS *p);
NvlStatus nvswitch_ctrl_inband_read_data_lr10(nvswitch_device *device, NVSWITCH_INBAND_READ_DATA_PARAMS *p);
NvlStatus nvswitch_launch_ALI_link_training_lr10(nvswitch_device *device, nvlink_link *link, NvBool bSync);
NvlStatus nvswitch_launch_ALI_link_training_lr10(nvswitch_device *device, nvlink_link *link);
NvlStatus nvswitch_service_minion_link_lr10(nvswitch_device *device, NvU32 nvliptInstance);
void nvswitch_apply_recal_settings_lr10(nvswitch_device *device, nvlink_link *link);
NvlStatus nvswitch_ctrl_get_sw_info_lr10(nvswitch_device *device, NVSWITCH_GET_SW_INFO_PARAMS *p);

View File

@ -2047,8 +2047,7 @@ NvlStatus
nvswitch_launch_ALI_link_training_lr10
(
nvswitch_device *device,
nvlink_link *link,
NvBool bSync
nvlink_link *link
)
{
return NVL_ERR_NOT_IMPLEMENTED;

View File

@ -307,7 +307,7 @@ _nvswitch_corelib_ali_training
)
{
nvswitch_device *device = link->dev->pDevInfo;
return device->hal.nvswitch_launch_ALI_link_training(device, link, NV_FALSE);
return device->hal.nvswitch_launch_ALI_link_training(device, link);
}
void
@ -4191,11 +4191,10 @@ NvlStatus
nvswitch_launch_ALI_link_training
(
nvswitch_device *device,
nvlink_link *link,
NvBool bSync
nvlink_link *link
)
{
return device->hal.nvswitch_launch_ALI_link_training(device, link, bSync);
return device->hal.nvswitch_launch_ALI_link_training(device, link);
}
NvlStatus

View File

@ -583,25 +583,4 @@ typedef struct NV208F_CTRL_FB_CLEAR_REMAPPED_ROWS_PARAMS {
NvU32 sourceMask;
} NV208F_CTRL_FB_CLEAR_REMAPPED_ROWS_PARAMS;
/*
* NV208F_CTRL_CMD_FB_GET_FLOORSWEPT_FBPA_MASK
*
* This command calculates the floorswept fbpa mask by taking 1/2 HBM
* floorsweeping into account
*
* fbpaMask
* This value of the mask.
*
* Possbile status values returned are:
* NV_OK
* NV_ERR_NOT_SUPPORTED
*/
#define NV208F_CTRL_CMD_FB_GET_FLOORSWEPT_FBPA_MASK (0x208f0516) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_DIAG_FB_INTERFACE_ID << 8) | NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS_MESSAGE_ID" */
#define NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS_MESSAGE_ID (0x16U)
typedef struct NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS {
NvU32 fbpaMask;
} NV208F_CTRL_FB_GET_FLOORSWEPT_FBPA_MASK_PARAMS;
/* _ctrl208ffb_h_ */

View File

@ -5200,11 +5200,13 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
const NVHwModeViewPortEvo *pViewPortMin,
const NVHwModeViewPortEvo *pViewPort,
const NVHwModeViewPortEvo *pViewPortMax,
NVEvoUpdateState *updateState)
NVEvoUpdateState *updateState,
NvU32 setWindowUsageBounds)
{
const NVEvoCapabilitiesPtr pEvoCaps = &pDevEvo->gpus[0].capabilities;
NVEvoChannelPtr pChannel = pDevEvo->core;
struct NvKmsScalingUsageBounds scalingUsageBounds = { };
NvU32 win;
/* These methods should only apply to a single pDpy */
nvAssert(pDevEvo->subDevMaskStackDepth > 0);
@ -5250,6 +5252,35 @@ static NvBool EvoSetViewportInOut3(NVDevEvoPtr pDevEvo, const int head,
DRF_NUM(C37D, _HEAD_SET_MAX_OUTPUT_SCALE_FACTOR, _VERTICAL,
scalingUsageBounds.maxVDownscaleFactor));
/*
* Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
* for each window that is attached to the head.
*
* Precomp will clip the post-scaled window to the input viewport, reverse-scale
* this cropped size back to the input surface domain, and isohub will fetch
* this cropped size. This function assumes that there's no window scaling yet,
* so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
* width. SetScalingUsageBoundsOneWindow5() will take care of updating
* MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
*
* Program MAX_PIXELS_FETCHED_PER_LINE for each window that is attached to
* head. For Turing+, SetScalingUsageBoundsOneWindow5() will take care of
* programming window usage bounds only for the layers/windows in use.
*/
setWindowUsageBounds |=
DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
GetMaxPixelsFetchedPerLine(pViewPort->in.width,
NV_EVO_SCALE_FACTOR_1X));
for (win = 0; win < pDevEvo->numWindows; win++) {
if (head != pDevEvo->headForWindow[win]) {
continue;
}
nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1);
nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds);
}
return scalingUsageBounds.vUpscalingAllowed;
}
@ -5260,11 +5291,10 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvU32 win;
NvU32 setWindowUsageBounds = NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3;
NvBool verticalUpscalingAllowed =
EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
pViewPortMax, updateState);
pViewPortMax, updateState,
NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C3);
nvDmaSetStartEvoMethod(pChannel,
NVC37D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);
@ -5274,34 +5304,6 @@ static void EvoSetViewportInOutC3(NVDevEvoPtr pDevEvo, const int head,
(verticalUpscalingAllowed ?
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _TRUE) :
DRF_DEF(C37D, _HEAD_SET_HEAD_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE)));
/*
* Program MAX_PIXELS_FETCHED_PER_LINE window usage bounds
* for each window that is attached to the head.
*
* Precomp will clip the post-scaled window to the input viewport, reverse-scale
* this cropped size back to the input surface domain, and isohub will fetch
* this cropped size. This function assumes that there's no window scaling yet,
* so the MAX_PIXELS_FETCHED_PER_LINE will be bounded by the input viewport
* width. SetScalingUsageBoundsOneWindow5() will take care of updating
* MAX_PIXELS_FETCHED_PER_LINE, if window scaling is enabled later.
* On Volta, Program for each window that is attached to head. For turing+,
* SetScalingUsageBoundsOneWindow5() will take care of programming window
* usage bounds only for the layers/windows in use.
*/
setWindowUsageBounds |=
DRF_NUM(C37D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _MAX_PIXELS_FETCHED_PER_LINE,
GetMaxPixelsFetchedPerLine(pViewPort->in.width,
NV_EVO_SCALE_FACTOR_1X));
for (win = 0; win < pDevEvo->numWindows; win++) {
if (head != pDevEvo->headForWindow[win]) {
continue;
}
nvDmaSetStartEvoMethod(pChannel, NVC37D_WINDOW_SET_WINDOW_USAGE_BOUNDS(win), 1);
nvDmaSetEvoMethodData(pChannel, setWindowUsageBounds);
}
}
static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
@ -5311,9 +5313,13 @@ static void EvoSetViewportInOutC5(NVDevEvoPtr pDevEvo, const int head,
NVEvoUpdateState *updateState)
{
NVEvoChannelPtr pChannel = pDevEvo->core;
NvU32 setWindowUsageBounds =
(NV_EVO3_DEFAULT_WINDOW_USAGE_BOUNDS_C5 |
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _INPUT_SCALER_TAPS, _TAPS_2) |
DRF_DEF(C57D, _WINDOW_SET_WINDOW_USAGE_BOUNDS, _UPSCALING_ALLOWED, _FALSE));
NvU32 verticalUpscalingAllowed =
EvoSetViewportInOut3(pDevEvo, head, pViewPortMin, pViewPort,
pViewPortMax, updateState);
pViewPortMax, updateState, setWindowUsageBounds);
nvDmaSetStartEvoMethod(pChannel,
NVC57D_HEAD_SET_HEAD_USAGE_BOUNDS(head), 1);

View File

@ -636,33 +636,27 @@ typedef enum
#define NV_GET_NV_STATE(pGpu) \
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
}
#define IS_REG_OFFSET(nv, offset, length) \
(((offset) >= (nv)->regs->cpu_address) && \
(((offset) + ((length)-1)) <= \
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
}
#define IS_FB_OFFSET(nv, offset, length) \
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
}
#define IS_UD_OFFSET(nv, offset, length) \
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
((offset) >= (nv)->ud.cpu_address) && \
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
}
#define IS_IMEM_OFFSET(nv, offset, length) \
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
(((offset) + ((length) - 1)) <= \
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
#define NV_RM_MAX_MSIX_LINES 8

View File

@ -780,8 +780,10 @@ static NV_STATUS RmAccessRegistry(
RmStatus = NV_ERR_INVALID_STRING_LENGTH;
goto done;
}
// get access to client's parmStr
RMAPI_PARAM_COPY_INIT(parmStrParamCopy, tmpParmStr, clientParmStrAddress, ParmStrLength, 1);
parmStrParamCopy.flags |= RMAPI_PARAM_COPY_FLAGS_ZERO_BUFFER;
RmStatus = rmapiParamsAcquire(&parmStrParamCopy, NV_TRUE);
if (RmStatus != NV_OK)
{
@ -2027,7 +2029,6 @@ static NV_STATUS RmGetAllocPrivate(
PMEMORY_DESCRIPTOR pMemDesc;
NvU32 pageOffset;
NvU64 pageCount;
NvU64 endingOffset;
RsResourceRef *pResourceRef;
RmResource *pRmResource;
void *pMemData;
@ -2088,9 +2089,8 @@ static NV_STATUS RmGetAllocPrivate(
if (rmStatus != NV_OK)
goto done;
endingOffset = pageOffset + length;
pageCount = (endingOffset / os_page_size);
pageCount += (*pPageIndex + ((endingOffset % os_page_size) ? 1 : 0));
pageCount = ((pageOffset + length) / os_page_size);
pageCount += (*pPageIndex + (((pageOffset + length) % os_page_size) ? 1 : 0));
if (pageCount > NV_RM_PAGES_TO_OS_PAGES(pMemDesc->PageCount))
{

View File

@ -638,15 +638,6 @@ osInitNvMapping(
sysApplyLockingPolicy(pSys);
pGpu->busInfo.IntLine = nv->interrupt_line;
//
// Set the DMA address size as soon as we have the HAL to call to
// determine the precise number of physical address bits supported
// by the architecture. DMA allocations should not be made before
// this point.
//
nv_set_dma_address_size(nv, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM));
pGpu->dmaStartAddress = (RmPhysAddr)nv_get_dma_start_address(nv);
if (nv->fb != NULL)
{
@ -735,6 +726,15 @@ osTeardownScalability(
return clTeardownPcie(pGpu, pCl);
}
static inline void
RmSetDeviceDmaAddressSize(
nv_state_t *nv,
NvU8 numDmaAddressBits
)
{
nv_set_dma_address_size(nv, numDmaAddressBits);
}
static void
populateDeviceAttributes(
OBJGPU *pGpu,
@ -884,6 +884,8 @@ RmInitNvDevice(
return;
}
RmSetDeviceDmaAddressSize(nv, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM));
os_disable_console_access();
status->rmStatus = gpumgrStateInitGpu(pGpu);
@ -1187,7 +1189,7 @@ NvBool RmInitPrivateState(
// Set up a reasonable default DMA address size, based on the minimum
// possible on currently supported GPUs.
//
nv_set_dma_address_size(pNv, NV_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
RmSetDeviceDmaAddressSize(pNv, NV_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
os_mem_set(nvp, 0, sizeof(*nvp));
nvp->status = NV_ERR_INVALID_STATE;
@ -1581,7 +1583,7 @@ NvBool RmInitAdapter(
//
if (nv->request_firmware)
{
nv_set_dma_address_size(nv, NV_GSP_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
RmSetDeviceDmaAddressSize(nv, NV_GSP_GPU_MIN_SUPPORTED_DMA_ADDR_WIDTH);
gspFwHandle = nv_get_firmware(nv, NV_FIRMWARE_GSP,
&gspFw.pBuf,

View File

@ -214,17 +214,6 @@ void __nvoc_init_dataField_KernelNvlink(KernelNvlink *pThis, RmHalspecOwner *pRm
pThis->setProperty(pThis, PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_REFCNT, ((NvBool)(0 == 0)));
}
// NVOC Property Hal field -- PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x08000000UL) )) /* ChipHal: GH100 */
{
pThis->setProperty(pThis, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK, ((NvBool)(0 == 0)));
}
// default
else
{
pThis->setProperty(pThis, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK, ((NvBool)(0 != 0)));
}
// NVOC Property Hal field -- PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x0870fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | GH100 */
{
@ -646,22 +635,6 @@ static void __nvoc_init_funcTable_KernelNvlink_1(KernelNvlink *pThis, RmHalspecO
}
}
// Hal function -- knvlinkIsFloorSweepingNeeded
if (0)
{
}
else if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* RmVariantHal: PF_KERNEL_ONLY */
{
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x08000000UL) )) /* ChipHal: GH100 */
{
pThis->__knvlinkIsFloorSweepingNeeded__ = &knvlinkIsFloorSweepingNeeded_GH100;
}
else if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x0070ffe0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 | GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 */
{
pThis->__knvlinkIsFloorSweepingNeeded__ = &knvlinkIsFloorSweepingNeeded_491d52;
}
}
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelNvlink_engstateConstructEngine;
pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelNvlink_engstateStatePreInitLocked;

View File

@ -218,7 +218,6 @@ struct KernelNvlink {
NV_STATUS (*__knvlinkIsAliSupported__)(OBJGPU *, struct KernelNvlink *);
NV_STATUS (*__knvlinkPostSetupNvlinkPeer__)(OBJGPU *, struct KernelNvlink *);
NV_STATUS (*__knvlinkDiscoverPostRxDetLinks__)(OBJGPU *, struct KernelNvlink *, OBJGPU *);
NvBool (*__knvlinkIsFloorSweepingNeeded__)(OBJGPU *, struct KernelNvlink *, NvU32, NvU32);
NV_STATUS (*__knvlinkReconcileTunableState__)(POBJGPU, struct KernelNvlink *, void *);
NV_STATUS (*__knvlinkStateInitLocked__)(POBJGPU, struct KernelNvlink *);
NV_STATUS (*__knvlinkStatePreLoad__)(POBJGPU, struct KernelNvlink *, NvU32);
@ -237,7 +236,6 @@ struct KernelNvlink {
NvBool PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED;
NvBool PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_SUPPORTED;
NvBool PDB_PROP_KNVLINK_UNSET_NVLINK_PEER_REFCNT;
NvBool PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK;
NvBool PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING;
NvBool PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED;
NvBool PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD;
@ -318,16 +316,12 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelNvlink;
((KernelNvlink*)__nvoc_dynamicCast(staticCast((pThis), Dynamic), classInfo(KernelNvlink)))
#endif //__nvoc_kernel_nvlink_h_disabled
#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_CAST
#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_NAME PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING
#define PDB_PROP_KNVLINK_MINION_GFW_BOOT_BASE_CAST
#define PDB_PROP_KNVLINK_MINION_GFW_BOOT_BASE_NAME PDB_PROP_KNVLINK_MINION_GFW_BOOT
#define PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK_BASE_CAST
#define PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK_BASE_NAME PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_CAST
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED
#define PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED_BASE_CAST
#define PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED_BASE_NAME PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_CAST
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED
#define PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING_BASE_CAST
#define PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING_BASE_NAME PDB_PROP_KNVLINK_MINION_FORCE_ALI_TRAINING
#define PDB_PROP_KNVLINK_ENABLED_BASE_CAST
@ -342,10 +336,12 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_KernelNvlink;
#define PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED_BASE_NAME PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED
#define PDB_PROP_KNVLINK_IS_MISSING_BASE_CAST __nvoc_base_OBJENGSTATE.
#define PDB_PROP_KNVLINK_IS_MISSING_BASE_NAME PDB_PROP_ENGSTATE_IS_MISSING
#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_CAST
#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_NAME PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6
#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_CAST
#define PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING_BASE_NAME PDB_PROP_KNVLINK_WAR_BUG_3471679_PEERID_FILTERING
#define PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED_BASE_CAST
#define PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED_BASE_NAME PDB_PROP_KNVLINK_SINGLE_LANE_POWER_STATE_ENABLED
#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_CAST
#define PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6_BASE_NAME PDB_PROP_KNVLINK_BUG2274645_RESET_FOR_RTD3_FGC6
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD_BASE_CAST
#define PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD_BASE_NAME PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD
#define PDB_PROP_KNVLINK_DECONFIG_HSHUB_ON_NO_MAPPING_BASE_CAST
@ -400,8 +396,6 @@ NV_STATUS __nvoc_objCreate_KernelNvlink(KernelNvlink**, Dynamic*, NvU32);
#define knvlinkPostSetupNvlinkPeer_HAL(pGpu, pKernelNvlink) knvlinkPostSetupNvlinkPeer_DISPATCH(pGpu, pKernelNvlink)
#define knvlinkDiscoverPostRxDetLinks(pGpu, pKernelNvlink, pPeerGpu) knvlinkDiscoverPostRxDetLinks_DISPATCH(pGpu, pKernelNvlink, pPeerGpu)
#define knvlinkDiscoverPostRxDetLinks_HAL(pGpu, pKernelNvlink, pPeerGpu) knvlinkDiscoverPostRxDetLinks_DISPATCH(pGpu, pKernelNvlink, pPeerGpu)
#define knvlinkIsFloorSweepingNeeded(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl) knvlinkIsFloorSweepingNeeded_DISPATCH(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl)
#define knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl) knvlinkIsFloorSweepingNeeded_DISPATCH(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl)
#define knvlinkReconcileTunableState(pGpu, pEngstate, pTunableState) knvlinkReconcileTunableState_DISPATCH(pGpu, pEngstate, pTunableState)
#define knvlinkStateInitLocked(pGpu, pEngstate) knvlinkStateInitLocked_DISPATCH(pGpu, pEngstate)
#define knvlinkStatePreLoad(pGpu, pEngstate, arg0) knvlinkStatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
@ -1074,19 +1068,6 @@ static inline NV_STATUS knvlinkSetUniqueFlaBaseAddress(OBJGPU *pGpu, struct Kern
#define knvlinkSetUniqueFlaBaseAddress_HAL(pGpu, pKernelNvlink, arg0) knvlinkSetUniqueFlaBaseAddress(pGpu, pKernelNvlink, arg0)
NV_STATUS knvlinkFloorSweep_IMPL(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numLinksPerIp, NvU32 *pNumActiveLinks);
#ifdef __nvoc_kernel_nvlink_h_disabled
static inline NV_STATUS knvlinkFloorSweep(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numLinksPerIp, NvU32 *pNumActiveLinks) {
NV_ASSERT_FAILED_PRECOMP("KernelNvlink was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_kernel_nvlink_h_disabled
#define knvlinkFloorSweep(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks) knvlinkFloorSweep_IMPL(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks)
#endif //__nvoc_kernel_nvlink_h_disabled
#define knvlinkFloorSweep_HAL(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks) knvlinkFloorSweep(pGpu, pKernelNvlink, numLinksPerIp, pNumActiveLinks)
static inline NvU64 knvlinkGetUniqueFabricBaseAddress_72249a(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink) {
return pKernelNvlink->fabricBaseAddr;
}
@ -1391,16 +1372,6 @@ static inline NV_STATUS knvlinkDiscoverPostRxDetLinks_DISPATCH(OBJGPU *pGpu, str
return pKernelNvlink->__knvlinkDiscoverPostRxDetLinks__(pGpu, pKernelNvlink, pPeerGpu);
}
static inline NvBool knvlinkIsFloorSweepingNeeded_491d52(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl) {
return ((NvBool)(0 != 0));
}
NvBool knvlinkIsFloorSweepingNeeded_GH100(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl);
static inline NvBool knvlinkIsFloorSweepingNeeded_DISPATCH(OBJGPU *pGpu, struct KernelNvlink *pKernelNvlink, NvU32 numActiveLinksPerIoctrl, NvU32 numLinksPerIoctrl) {
return pKernelNvlink->__knvlinkIsFloorSweepingNeeded__(pGpu, pKernelNvlink, numActiveLinksPerIoctrl, numLinksPerIoctrl);
}
static inline NV_STATUS knvlinkReconcileTunableState_DISPATCH(POBJGPU pGpu, struct KernelNvlink *pEngstate, void *pTunableState) {
return pEngstate->__knvlinkReconcileTunableState__(pGpu, pEngstate, pTunableState);
}

View File

@ -1077,7 +1077,6 @@ void memdescUnmapInternal(OBJGPU *pGpu, MEMORY_DESCRIPTOR *pMemDesc, NvU32 flags
// currently for this, so a WAR is required for r515. The intent
// is to remove this by r525.
//
#define MEMDESC_FLAGS_WSL_SHARED_MEMORY NVBIT64(46)
#endif // _MEMDESC_H_

View File

@ -849,6 +849,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2203, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090 Ti" },
{ 0x2204, 0x0000, 0x0000, "NVIDIA GeForce RTX 3090" },
{ 0x2206, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
{ 0x2207, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti" },
{ 0x2208, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti" },
{ 0x220A, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080" },
{ 0x220D, 0x0000, 0x0000, "NVIDIA CMP 90HX" },
@ -873,6 +874,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2236, 0x1482, 0x10de, "NVIDIA A10" },
{ 0x2237, 0x152f, 0x10de, "NVIDIA A10G" },
{ 0x2238, 0x1677, 0x10de, "NVIDIA A10M" },
{ 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
{ 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
{ 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
{ 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },
@ -900,6 +902,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x24B9, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
{ 0x24BA, 0x0000, 0x0000, "NVIDIA RTX A4500 Laptop GPU" },
{ 0x24BB, 0x0000, 0x0000, "NVIDIA RTX A3000 12GB Laptop GPU" },
{ 0x24C9, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
{ 0x24DC, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Laptop GPU" },
{ 0x24DD, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Laptop GPU" },
{ 0x24E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 3070 Ti Laptop GPU" },
@ -915,6 +918,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2531, 0x151d, 0x103c, "NVIDIA RTX A2000" },
{ 0x2531, 0x151d, 0x10de, "NVIDIA RTX A2000" },
{ 0x2531, 0x151d, 0x17aa, "NVIDIA RTX A2000" },
{ 0x2544, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060" },
{ 0x2560, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Laptop GPU" },
{ 0x2563, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Ti Laptop GPU" },
{ 0x2571, 0x1611, 0x1028, "NVIDIA RTX A2000 12GB" },
@ -944,6 +948,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x25E5, 0x0000, 0x0000, "NVIDIA GeForce RTX 3050 Laptop GPU" },
{ 0x25F9, 0x0000, 0x0000, "NVIDIA RTX A1000 Embedded GPU" },
{ 0x25FA, 0x0000, 0x0000, "NVIDIA RTX A2000 Embedded GPU" },
{ 0x2684, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090" },
{ 0x13BD, 0x11cc, 0x10DE, "GRID M10-0B" },
{ 0x13BD, 0x11cd, 0x10DE, "GRID M10-1B" },
{ 0x13BD, 0x11ce, 0x10DE, "GRID M10-0Q" },

View File

@ -309,7 +309,6 @@ kbusRemoveNvlinkPeerMapping_GP100
NvU32 peerGpuInst = gpuGetInstance(pGpu1);
KernelNvlink *pKernelNvlink0 = GPU_GET_KERNEL_NVLINK(pGpu0);
NvBool bLoopback = (pGpu0 == pGpu1);
NvBool bBufferReady = NV_FALSE;
NV_ASSERT_OR_RETURN(pKernelNvlink0 != NULL, NV_ERR_NOT_SUPPORTED);
@ -416,23 +415,8 @@ kbusRemoveNvlinkPeerMapping_GP100
}
}
//
// Call knvlinkUpdateCurrentConfig to flush settings to the registers
// Skip this call if buffer ready is set and CONFIG_REQUIRE_INITIALIZED is true
//
status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu0, pKernelNvlink0);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
bBufferReady = ((pKernelNvlink0->initializedLinks & pKernelNvlink0->peerLinkMasks[peerId]) != 0) ? NV_TRUE : NV_FALSE;
if (!pKernelNvlink0->getProperty(pKernelNvlink0, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK) ||
!bBufferReady)
{
status = knvlinkUpdateCurrentConfig(pGpu0, pKernelNvlink0);
}
status = knvlinkUpdateCurrentConfig(pGpu0, pKernelNvlink0);
}
return status;

View File

@ -3756,7 +3756,6 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
Memory *pMemory;
ContextDma *pContextDma;
NvU32 addressSpace;
NvU64 notificationBufferSize;
NV_STATUS status;
hNotifier = pKernelChannel->hErrorContext;
@ -3765,8 +3764,6 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
NV_CHECK_OR_RETURN(LEVEL_INFO, index != NV_CHANNELGPFIFO_NOTIFICATION_TYPE_ERROR,
NV_ERR_INVALID_ARGUMENT);
notificationBufferSize = (index + 1) * sizeof(NvNotification);
status = deviceGetByInstance(pClient, gpuGetDeviceInstance(pGpu), &pDevice);
if (status != NV_OK)
return NV_ERR_INVALID_DEVICE;
@ -3775,7 +3772,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
{
addressSpace = memdescGetAddressSpace(pMemory->pMemDesc);
NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= notificationBufferSize,
NV_CHECK_OR_RETURN(LEVEL_INFO, pMemory->Length >= ((index + 1) * sizeof(NvNotification)),
NV_ERR_OUT_OF_RANGE);
switch (addressSpace)
{
@ -3793,7 +3790,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
&pDmaMappingInfo),
NV_ERR_GENERIC);
NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= notificationBufferSize,
NV_CHECK_OR_RETURN(LEVEL_INFO, pDmaMappingInfo->pMemDesc->Size >= ((index + 1) * sizeof(NvNotification)),
NV_ERR_OUT_OF_RANGE);
break;
}
@ -3808,7 +3805,7 @@ kchannelUpdateWorkSubmitTokenNotifIndex_IMPL
}
else if (NV_OK == ctxdmaGetByHandle(pClient, hNotifier, &pContextDma))
{
NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (notificationBufferSize - 1),
NV_CHECK_OR_RETURN(LEVEL_INFO, pContextDma->Limit >= (((index + 1) * sizeof(NvNotification)) - 1),
NV_ERR_OUT_OF_RANGE);
}
else

View File

@ -1927,7 +1927,6 @@ memmgrFillComprInfo_IMPL
{
const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
NvU32 size;
portMemSet(pComprInfo, 0, sizeof(*pComprInfo));
@ -1938,12 +1937,10 @@ memmgrFillComprInfo_IMPL
NV_ASSERT(compTagStartOffset != ~(NvU32)0);
size = pageSize * pageCount;
pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift;
pComprInfo->compTagLineMin = compTagStartOffset;
pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift);
pComprInfo->compPageIndexHi = (NvU32)((surfOffset + size - 1) >> pComprInfo->compPageShift);
pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift);
pComprInfo->compTagLineMultiplier = 1;
return NV_OK;

View File

@ -132,8 +132,6 @@ knvlinkRemoveMapping_GA100
)
{
NV_STATUS status = NV_OK;
NvU32 peerId;
NvBool bBufferReady = NV_FALSE;
NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_PARAMS params;
portMemSet(&params, 0, sizeof(params));
@ -163,35 +161,7 @@ knvlinkRemoveMapping_GA100
// the MUX registers and the connection config registers. So, we have
// to call nvlinkCurrentConfig instead of nvlinkUpdateHshubConfigRegs
//
status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK))
{
FOR_EACH_INDEX_IN_MASK(32, peerId, peerMask)
{
if (pKernelNvlink->initializedLinks & pKernelNvlink->peerLinkMasks[peerId])
{
bBufferReady = NV_TRUE;
break;
}
} FOR_EACH_INDEX_IN_MASK_END;
if (!bBufferReady)
{
status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
}
}
else
{
status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
}
return status;
return knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
}
/*!

View File

@ -248,49 +248,3 @@ knvlinkDiscoverPostRxDetLinks_GH100
return status;
}
/*!
* @brief Check if floorsweeping is needed for this particular chip
*
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelNvlink KernelNvlink pointer
*
* @returns On success, sets unique fabric address and returns NV_OK.
* On failure, returns NV_ERR_XXX.
*/
NvBool
knvlinkIsFloorSweepingNeeded_GH100
(
OBJGPU *pGpu,
KernelNvlink *pKernelNvlink,
NvU32 numActiveLinksPerIoctrl,
NvU32 numLinksPerIoctrl
)
{
//
// Only floorsweep down the given GPU if the following conditions are met:
// 1. if the number of links for the IP is > 0
//
// 2. The number of active links allowed for the IOCTRL is less then the
// total number of links for the IOCTRL. No reason to spend time in code
// if the exectution of it will be a NOP
//
// 3. If the GPU has never been floorswept. An optimization to make sure RM
// doesn't burn cycles repeatedly running running code that will be a NOP
//
// 4. (temporary) Run only on Silicon chips. Fmodel currently doesn't support
// this feature
//
if ((numLinksPerIoctrl > 0 && numActiveLinksPerIoctrl > 0) &&
numActiveLinksPerIoctrl < numLinksPerIoctrl &&
!pKernelNvlink->bFloorSwept &&
IS_SILICON(pGpu))
{
return NV_TRUE;
}
return NV_FALSE;
}

View File

@ -349,11 +349,7 @@ knvlinkGetP2pConnectionStatus_IMPL
}
// Get the remote ends of the links of local GPU from the nvlink core
status = knvlinkCoreGetRemoteDeviceInfo(pGpu0, pKernelNvlink0);
if (status != NV_OK)
{
return status;
}
knvlinkCoreGetRemoteDeviceInfo(pGpu0, pKernelNvlink0);
// Post topology link enable on links of local GPU
status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0,
@ -369,11 +365,7 @@ knvlinkGetP2pConnectionStatus_IMPL
if (knvlinkGetNumLinksToPeer(pGpu1, pKernelNvlink1, pGpu0) != numPeerLinks)
{
// Get the remote ends of the links of remote GPU from the nvlink core
status = knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);
if (status != NV_OK)
{
return status;
}
knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);
// Post topology link enable on links of remote GPU
status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1,
@ -492,12 +484,12 @@ knvlinkUpdateCurrentConfig_IMPL
{
pKCe = GPU_GET_KCE(pGpu, i);
if (pKCe)
{
status = kceTopLevelPceLceMappingsUpdate(pGpu, pKCe);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to update PCE-LCE mappings\n");
}
status = kceTopLevelPceLceMappingsUpdate(pGpu, pKCe);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to update PCE-LCE mappings\n");
}
break;
}
}
@ -815,8 +807,8 @@ knvlinkPrepareForXVEReset_IMPL
// Remove all NVLink mappings in HSHUB config registers to init values
if (!API_GPU_IN_RESET_SANITY_CHECK(pGpu) && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST))
status = knvlinkRemoveMapping_HAL(pGpu, pKernelNvlink, NV_TRUE, ((1 << NVLINK_MAX_PEERS_SW) - 1),
NV_FALSE /* bL2Entry */);
status = knvlinkRemoveMapping_HAL(pGpu, pKernelNvlink, NV_TRUE, ((1 << NVLINK_MAX_PEERS_SW) - 1),
NV_FALSE /* bL2Entry */);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR,
@ -1217,7 +1209,7 @@ knvlinkUpdateLinkConnectionStatus_IMPL
#if defined(INCLUDE_NVLINK_LIB)
params.bConnected = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected;
params.bConnected = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected;
params.remoteDeviceType = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType;
params.remoteLinkNumber = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;

View File

@ -241,7 +241,6 @@ subdeviceCtrlCmdBusGetNvlinkStatus_IMPL
NvU32 r = 0;
NvBool bPeerLink, bSysmemLink, bSwitchLink;
NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_INFO_PARAMS nvlinkLinkAndClockInfoParams;
NvBool bIsNvlinkReady = NV_TRUE;
//
// vGPU:
@ -288,28 +287,9 @@ subdeviceCtrlCmdBusGetNvlinkStatus_IMPL
{
// Get the nvlink connections for this device from the core
knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink);
//
// Get the nvlink connections for this device from the core
// If the function fails then the corelib doesn't have enough
// info to validate connectivity so we should mark the API call
// as not ready
//
status = knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink);
if (status == NV_ERR_NOT_READY)
{
NV_PRINTF(LEVEL_INFO, "Nvlink is not ready yet!\n");
bIsNvlinkReady = NV_FALSE;
}
else if (status != NV_OK)
{
return status;
}
}
// If nvlink is not ready don't report back any links as being enabled
pParams->enabledLinkMask = (bIsNvlinkReady) ? pKernelNvlink->enabledLinks : 0x0;
pParams->enabledLinkMask = pKernelNvlink->enabledLinks;
r = pParams->enabledLinkMask;
while (r >>= 1 ) i++;

View File

@ -75,13 +75,16 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL
#if defined(INCLUDE_NVLINK_LIB)
OBJSYS *pSys = SYS_GET_INSTANCE();
NvU32 flags = NVLINK_STATE_CHANGE_SYNC;
NvBool bNvswitchProxyPresent = NV_FALSE;
NvBool bUpdateConnStatus = NV_FALSE;
NvBool bCheckDegradedMode = NV_FALSE;
OBJSYS *pSys = SYS_GET_INSTANCE();
NvU32 flags = NVLINK_STATE_CHANGE_SYNC;
NvBool bNvswitchProxyPresent = NV_FALSE;
NvBool bUpdateConnStatus = NV_FALSE;
NvBool bCheckDegradedMode = NV_FALSE;
NvU32 linkId;
NvU32 tmpDisabledLinkMask = 0;
NvU32 tmpEnabledLinkMask = 0;
nvlink_conn_info conn_info;
NvU32 linkId;
NvU32 numActiveLinksPerIoctrl = 0;
NvU32 numLinksPerIoctrl = 0;
@ -91,6 +94,18 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL
//
if (!knvlinkPoweredUpForD3_HAL(pGpu, pKernelNvlink))
{
if (pKernelNvlink->bEnableAli)
{
// Update the post Rx Det link Mask for the GPU
knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink);
}
if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40)
{
numActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
}
//
// Optimization: Check for nvlink proxy only when system fabric is externally
// managed. This would avoid RPCs in non-nvswitch cases.
@ -100,26 +115,115 @@ knvlinkCoreGetRemoteDeviceInfo_IMPL
bNvswitchProxyPresent = knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink);
}
if (pKernelNvlink->bEnableAli)
{
// Update the post Rx Det link Mask for the GPU
knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink);
}
//
// If on Nvlink4.0+ then before topology discovery is performed then
// first enter the corelib floorsweeping function to floorsweep down
// the GPU if requested.
// This path does not cache connection info since its main purpose is to
// edit the connection information before RM tries to cache and update itself
//
if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40 &&
!bNvswitchProxyPresent &&
!pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED) &&
pKernelNvlink->pNvlinkDev != NULL)
pKernelNvlink->pNvlinkDev != NULL &&
!pKernelNvlink->bFloorSwept &&
IS_SILICON(pGpu) &&
numActiveLinksPerIoctrl < numLinksPerIoctrl)
{
numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
status = knvlinkFloorSweep(pGpu, pKernelNvlink,
numLinksPerIoctrl, &numActiveLinksPerIoctrl);
// The path here is important not getting the connection info
FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
{
nvlink_lib_discover_and_get_remote_conn_info(
pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0);
}
FOR_EACH_INDEX_IN_MASK_END;
// floorsweeping in corelib will update connection info that RM qill query below
(void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev);
//
// If a link in the enabledLinkMask is not trained after floorsweeping then
// then add it to a tmp disabled linkMask
//
// Get the link train status for the enabled link masks
NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
linkTrainedParams.linkMask = pKernelNvlink->enabledLinks;
linkTrainedParams.bActiveOnly = NV_TRUE;
// Reset timeout to clear any accumulated timeouts from link init
if (IS_GSP_CLIENT(pGpu))
{
threadStateResetTimeout(pGpu);
}
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
(void *)&linkTrainedParams,
sizeof(linkTrainedParams));
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to floorsweep valid nvlink config!\n");
return NV_ERR_NOT_READY;
NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
return status;
}
//
// Create a temporary mask of all links that are now enabled:
// classified as a link in active
//
FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
{
if (linkTrainedParams.bIsLinkActive[linkId])
{
tmpEnabledLinkMask |= BIT(linkId);
}
else
{
tmpDisabledLinkMask |= BIT(linkId);
}
}
FOR_EACH_INDEX_IN_MASK_END;
// Redo linkMasks based on the search above being the ground truth
pKernelNvlink->enabledLinks = tmpEnabledLinkMask;
//
// remove any links not in active in the tmpEnabledLinkMask from all
// other link masks as these have been floorswept by the corelib
//
pKernelNvlink->disconnectedLinkMask = tmpEnabledLinkMask;
pKernelNvlink->initDisabledLinksMask = tmpDisabledLinkMask;
status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
// Re-sync the link masks with GSP
status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
NV_PRINTF(LEVEL_INFO,
"Post Floorsweeping: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n",
pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask);
pKernelNvlink->bFloorSwept = NV_TRUE;
//
// Assert that the number of links in active is always less then
// or equal to the number of active links on the chips
//
NV_ASSERT_OR_ELSE_STR((nvPopCount32(tmpEnabledLinkMask) <= numActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask)),
"Mismatch between links in active and #of links supported!\n", return NV_ERR_INVALID_STATE);
}
// We only need to look at links that are still considered disconnected
@ -438,7 +542,7 @@ knvlinkCheckTrainingIsComplete_IMPL
{
if (pKernelNvlink0->bLinkTrainingDebugSpew)
{
NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
}
return NV_ERR_GENERIC;
}
@ -495,7 +599,7 @@ knvlinkCheckTrainingIsComplete_IMPL
{
if (pKernelNvlink1->bLinkTrainingDebugSpew)
{
NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
}
return NV_ERR_GENERIC;
@ -1279,140 +1383,6 @@ knvlinkRetrainLink_IMPL
return status;
}
/*!
* @brief Floorsweep the nvlink config for the chip
*
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelNvlink KernelNvlink pointer
* @param[in] numLinksPerIp number of total links found in discovery
* @param[out] pNumLinkActive number of links needed to be active
*
* @returns On success, sets unique fabric address and returns NV_OK.
* On failure, returns NV_ERR_XXX.
*/
NV_STATUS
knvlinkFloorSweep_IMPL
(
OBJGPU *pGpu,
KernelNvlink *pKernelNvlink,
NvU32 numLinksPerIoctrl,
NvU32 *pNumActiveLinksPerIoctrl
)
{
#if defined(INCLUDE_NVLINK_LIB)
NV_STATUS status = NV_OK;
NvU32 linkId;
NvU32 tmpDisabledLinkMask = 0;
NvU32 tmpEnabledLinkMask = 0;
nvlink_conn_info conn_info;
*pNumActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
if (!knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, *pNumActiveLinksPerIoctrl, numLinksPerIoctrl))
{
return NV_OK;
}
// The path here is important not getting the connection info
FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
{
nvlink_lib_discover_and_get_remote_conn_info(
pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0);
}
FOR_EACH_INDEX_IN_MASK_END;
// floorsweeping in corelib will update connection info that RM qill query below
(void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev);
//
// If a link in the enabledLinkMask is not trained after floorsweeping then
// then add it to a tmp disabled linkMask
//
// Get the link train status for the enabled link masks
NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
linkTrainedParams.linkMask = pKernelNvlink->enabledLinks;
linkTrainedParams.bActiveOnly = NV_TRUE;
// Reset timeout to clear any accumulated timeouts from link init
if (IS_GSP_CLIENT(pGpu))
{
threadStateResetTimeout(pGpu);
}
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
(void *)&linkTrainedParams,
sizeof(linkTrainedParams));
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
return status;
}
//
// Create a temporary mask of all links that are now enabled:
// classified as a link in active
//
FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
{
if (linkTrainedParams.bIsLinkActive[linkId])
{
tmpEnabledLinkMask |= BIT(linkId);
}
else
{
tmpDisabledLinkMask |= BIT(linkId);
}
}
FOR_EACH_INDEX_IN_MASK_END;
// Redo linkMasks based on the search above being the ground truth
pKernelNvlink->enabledLinks = tmpEnabledLinkMask;
//
// remove any links not in active in the tmpEnabledLinkMask from all
// other link masks as these have been floorswept by the corelib
//
pKernelNvlink->disconnectedLinkMask = tmpEnabledLinkMask;
pKernelNvlink->initDisabledLinksMask = tmpDisabledLinkMask;
status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
// Re-sync the link masks with GSP
status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
if (status != NV_OK)
{
NV_ASSERT(status == NV_OK);
return status;
}
//
// Assert that the number of links in active is always less then
// or equal to the number of active links on the chips
//
if(!(nvPopCount32(tmpEnabledLinkMask) <= *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask)))
{
NV_PRINTF(LEVEL_INFO,
"Floorsweeping didn't work! enabledMaskCount: 0x%x and numActiveLinksTotal: 0x%x. Current link info cached in SW: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n",
nvPopCount32(tmpEnabledLinkMask), *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask), pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask);
return NV_ERR_NOT_READY;
}
pKernelNvlink->bFloorSwept = NV_TRUE;
#endif //INCLUDE_NVLINK_LIB
return NV_OK;
}
/*!
* @brief Retrain the link from OFF state
*

View File

@ -758,8 +758,6 @@ NvBool gpumgrIsDeviceRmFirmwareCapable
0x2236, // A10 SKU215 Pris-24
0x2237, // A10G SKU215 Pris-24
0x25B6, // A16
0x20F5, // A800-80
0x20F6, // A800-40
};
NvU32 count = NV_ARRAY_ELEMENTS(defaultGspRmGpus);
NvU32 i;

View File

@ -1,4 +1,4 @@
NVIDIA_VERSION = 520.61.05
NVIDIA_VERSION = 520.56.06
# This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))