550.54.14
This commit is contained in:
parent
91676d6628
commit
476bd34534
@ -2,6 +2,12 @@
|
||||
|
||||
## Release 550 Entries
|
||||
|
||||
### [550.54.14] 2024-02-23
|
||||
|
||||
#### Added
|
||||
|
||||
- Added vGPU Host and vGPU Guest support. For vGPU Host, please refer to the README.vgpu packaged in the vGPU Host Package for more details.
|
||||
|
||||
### [550.40.07] 2024-01-24
|
||||
|
||||
#### Fixed
|
||||
|
21
README.md
21
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 550.40.07.
|
||||
version 550.54.14.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
550.40.07 driver release. This can be achieved by installing
|
||||
550.54.14 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -188,7 +188,10 @@ encountered specific to them.
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.07/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.54.14/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -650,7 +653,9 @@ Subsystem Device ID.
|
||||
| NVIDIA T400 4GB | 1FF2 103C 1613 |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
|
||||
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
|
||||
| NVIDIA T400E | 1FF2 10DE 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
|
||||
| NVIDIA T400E | 1FF2 17AA 18FF |
|
||||
| Quadro T1000 | 1FF9 |
|
||||
| NVIDIA A100-SXM4-40GB | 20B0 |
|
||||
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
|
||||
@ -746,12 +751,15 @@ Subsystem Device ID.
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H20 | 2329 10DE 198B |
|
||||
| NVIDIA H20 | 2329 10DE 198C |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 1805 |
|
||||
| NVIDIA GH200 480GB | 2342 10DE 1809 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
@ -805,6 +813,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 | 2584 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
@ -846,6 +855,7 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
|
||||
@ -854,6 +864,7 @@ Subsystem Device ID.
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
@ -891,6 +902,10 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -621,6 +621,14 @@ typedef enum
|
||||
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
/*
|
||||
* For console setup by EFI GOP, the base address is BAR1.
|
||||
* For console setup by VBIOS, the base address is BAR2 + 16MB.
|
||||
*/
|
||||
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
|
||||
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
|
||||
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iommus.iso_iommu_present)
|
||||
|
||||
@ -878,6 +886,8 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
|
||||
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
|
||||
|
||||
struct dma_buf;
|
||||
typedef struct nv_dma_buf nv_dma_buf_t;
|
||||
struct drm_gem_object;
|
||||
|
@ -956,12 +956,20 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
- This function should not be called when interrupts are disabled.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
pFaultInfo[IN] - information provided by RM for fault handling.
|
||||
used for obtaining the device handle without locks.
|
||||
bCopyAndFlush[IN] - Instructs RM to perform the flush in the Copy+Flush mode.
|
||||
In this mode, RM will perform a copy of the packets from
|
||||
the HW buffer to UVM's SW buffer as part of performing
|
||||
the flush. This mode gives UVM the opportunity to observe
|
||||
the packets contained within the HW buffer at the time
|
||||
of issuing the call.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceTogglePrefetchFaults
|
||||
@ -982,7 +990,8 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
@ -700,8 +700,10 @@ typedef struct UvmGpuInfo_tag
|
||||
// local EGM properties
|
||||
// NV_TRUE if EGM is enabled
|
||||
NvBool egmEnabled;
|
||||
|
||||
// Peer ID to reach local EGM when EGM is enabled
|
||||
NvU8 egmPeerId;
|
||||
|
||||
// EGM base address to offset in the GMMU PTE entry for EGM mappings
|
||||
NvU64 egmBaseAddr;
|
||||
} UvmGpuInfo;
|
||||
@ -712,9 +714,10 @@ typedef struct UvmGpuFbInfo_tag
|
||||
// RM regions that are not registered with PMA either.
|
||||
NvU64 maxAllocatableAddress;
|
||||
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
typedef struct UvmGpuEccInfo_tag
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -160,10 +160,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
|
||||
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
|
||||
NvBool NV_API_CALL os_is_grid_supported (void);
|
||||
NvU32 NV_API_CALL os_get_grid_csp_support (void);
|
||||
void NV_API_CALL os_get_screen_info (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
|
||||
void NV_API_CALL os_bug_check (NvU32, const char *);
|
||||
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
|
||||
@ -198,6 +197,8 @@ nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry (nv_cap_t *, const char *,
|
||||
void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
|
||||
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
|
||||
NvS32 NV_API_CALL os_imex_channel_count (void);
|
||||
|
||||
enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
|
||||
@ -219,6 +220,7 @@ extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
|
@ -75,7 +75,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
|
@ -96,5 +96,6 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
soc/tegra/bpmp.h \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h
|
||||
|
||||
|
@ -58,7 +58,7 @@
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 9
|
||||
#define UVM_API_LATEST_REVISION 11
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@ -297,7 +297,9 @@ NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU for which pageable memory access support is queried.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition for which
|
||||
// pageable memory access support is queried.
|
||||
//
|
||||
// pageableMemAccess: (OUTPUT)
|
||||
// Returns true (non-zero) if the GPU represented by gpuUuid supports
|
||||
@ -327,6 +329,12 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
|
||||
// process results in an error.
|
||||
//
|
||||
// After successfully registering a GPU partition, all subsequent API calls
|
||||
// which take a NvProcessorUuid argument (including UvmGpuMappingAttributes),
|
||||
// must use the GI partition UUID which can be obtained with
|
||||
// NvRmControl(NVC637_CTRL_CMD_GET_UUID). Otherwise, if the GPU is not SMC
|
||||
// capable or SMC enabled, the physical GPU UUID must be used.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU to register.
|
||||
@ -431,7 +439,8 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unregister.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to unregister.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@ -489,7 +498,8 @@ NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to register.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// On Linux: RM ctrl fd, hClient and hVaSpace.
|
||||
@ -560,7 +570,9 @@ NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid *gpuUuid,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU whose VA space should be unregistered.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition whose VA space
|
||||
// should be unregistered.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@ -590,7 +602,7 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// The two GPUs must be connected via PCIe. An error is returned if the GPUs are
|
||||
// not connected or are connected over an interconnect different than PCIe
|
||||
// (NVLink, for example).
|
||||
// (NVLink or SMC partitions, for example).
|
||||
//
|
||||
// If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
|
||||
// must support the same set of page sizes for GPU mappings.
|
||||
@ -603,10 +615,12 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidA: (INPUT)
|
||||
// UUID of GPU A.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition A.
|
||||
//
|
||||
// gpuUuidB: (INPUT)
|
||||
// UUID of GPU B.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition B.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NO_MEMORY:
|
||||
@ -652,10 +666,12 @@ NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidA: (INPUT)
|
||||
// UUID of GPU A.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition A.
|
||||
//
|
||||
// gpuUuidB: (INPUT)
|
||||
// UUID of GPU B.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition B.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@ -700,7 +716,9 @@ NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU that the channel is associated with.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition that the channel is
|
||||
// associated with.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// On Linux: RM ctrl fd, hClient and hChannel.
|
||||
@ -1139,11 +1157,14 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location for this VA range.
|
||||
// UUID of the CPU, UUID of the physical GPU if the GPU is not SMC
|
||||
// capable or SMC enabled, or the GPU instance UUID of the partition of
|
||||
// the preferred location for this VA range.
|
||||
//
|
||||
// accessedByUuids: (INPUT)
|
||||
// UUIDs of all processors that should have persistent mappings to this
|
||||
// VA range.
|
||||
// UUID of the CPU, UUID of the physical GPUs if the GPUs are not SMC
|
||||
// capable or SMC enabled, or the GPU instance UUID of the partitions
|
||||
// that should have persistent mappings to this VA range.
|
||||
//
|
||||
// accessedByCount: (INPUT)
|
||||
// Number of elements in the accessedByUuids array.
|
||||
@ -1421,7 +1442,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
@ -1499,7 +1522,9 @@ NV_STATUS UvmMigrate(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if the destination processor is
|
||||
@ -1576,7 +1601,9 @@ NV_STATUS UvmMigrateAsync(void *base,
|
||||
// Id of the range group whose associated VA ranges have to be migrated.
|
||||
//
|
||||
// destinationUuid: (INPUT)
|
||||
// UUID of the destination processor to migrate pages to.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
|
||||
// migrate pages to.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_OBJECT_NOT_FOUND:
|
||||
@ -1938,7 +1965,9 @@ NV_STATUS UvmMapExternalAllocation(void *base,
|
||||
//
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to map the sparse region on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to map the sparse
|
||||
// region on.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -1995,7 +2024,9 @@ NV_STATUS UvmMapExternalSparse(void *base,
|
||||
// The length of the virtual address range.
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to unmap the VA range from.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to unmap the VA
|
||||
// range from.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -2062,7 +2093,9 @@ NV_STATUS UvmUnmapExternalAllocation(void *base,
|
||||
// supported by the GPU.
|
||||
//
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to map the dynamic parallelism region on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to map the
|
||||
// dynamic parallelism region on.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_UVM_ADDRESS_IN_USE:
|
||||
@ -2293,7 +2326,9 @@ NV_STATUS UvmDisableReadDuplication(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// preferredLocationUuid: (INPUT)
|
||||
// UUID of the preferred location.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// preferred location.
|
||||
//
|
||||
// preferredCpuMemoryNode: (INPUT)
|
||||
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
|
||||
@ -2469,8 +2504,9 @@ NV_STATUS UvmUnsetPreferredLocation(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// accessedByUuid: (INPUT)
|
||||
// UUID of the processor that should have pages in the the VA range
|
||||
// mapped when possible.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// that should have pages in the VA range mapped when possible.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -2538,8 +2574,10 @@ NV_STATUS UvmSetAccessedBy(void *base,
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// accessedByUuid: (INPUT)
|
||||
// UUID of the processor from which any policies set by
|
||||
// UvmSetAccessedBy should be revoked for the given VA range.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID
|
||||
// from which any policies set by UvmSetAccessedBy should be revoked
|
||||
// for the given VA range.
|
||||
//
|
||||
// Errors:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
@ -2597,7 +2635,9 @@ NV_STATUS UvmUnsetAccessedBy(void *base,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to enable software-assisted system-wide atomics on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to enable
|
||||
// software-assisted system-wide atomics on.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_NO_MEMORY:
|
||||
@ -2633,7 +2673,9 @@ NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the GPU to disable software-assisted system-wide atomics on.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition to disable
|
||||
// software-assisted system-wide atomics on.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
@ -2862,7 +2904,9 @@ NV_STATUS UvmDebugCountersEnable(UvmDebugSession session,
|
||||
// Name of the counter in that scope.
|
||||
//
|
||||
// gpu: (INPUT)
|
||||
// Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition of the scoped GPU.
|
||||
// This parameter is ignored in AllGpu scopes.
|
||||
//
|
||||
// pCounterHandle: (OUTPUT)
|
||||
// Handle to the counter address.
|
||||
@ -2916,7 +2960,7 @@ NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session,
|
||||
// UvmEventQueueCreate
|
||||
//
|
||||
// This call creates an event queue of the given size.
|
||||
// No events are added in the queue till they are enabled by the user.
|
||||
// No events are added in the queue until they are enabled by the user.
|
||||
// Event queue data is visible to the user even after the target process dies
|
||||
// if the session is active and queue is not freed.
|
||||
//
|
||||
@ -2967,7 +3011,7 @@ NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle,
|
||||
// UvmEventQueueDestroy
|
||||
//
|
||||
// This call frees all interal resources associated with the queue, including
|
||||
// upinning of the memory associated with that queue. Freeing user buffer is
|
||||
// unpinning of the memory associated with that queue. Freeing user buffer is
|
||||
// responsibility of a caller. Event queue might be also destroyed as a side
|
||||
// effect of destroying a session associated with this queue.
|
||||
//
|
||||
@ -3151,9 +3195,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
// UvmEventGetGpuUuidTable
|
||||
//
|
||||
// Each migration event entry contains the gpu index to/from where data is
|
||||
// migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
|
||||
// Using indices saves on the size of each event entry. This API provides the
|
||||
// gpuIndex to gpuUuid relation to the user.
|
||||
// migrated. This index maps to a corresponding physical gpu UUID in the
|
||||
// gpuUuidTable. Using indices saves on the size of each event entry. This API
|
||||
// provides the gpuIndex to gpuUuid relation to the user.
|
||||
//
|
||||
// This API does not access the queue state maintained in the user
|
||||
// library and so the user doesn't need to acquire a lock to protect the
|
||||
@ -3161,9 +3205,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuidTable: (OUTPUT)
|
||||
// The return value is an array of UUIDs. The array index is the
|
||||
// corresponding gpuIndex. There can be at max 32 gpus associated with
|
||||
// UVM, so array size is 32.
|
||||
// The return value is an array of physical GPU UUIDs. The array index
|
||||
// is the corresponding gpuIndex. There can be at max 32 GPUs
|
||||
// associated with UVM, so array size is 32.
|
||||
//
|
||||
// validCount: (OUTPUT)
|
||||
// The system doesn't normally contain 32 GPUs. This field gives the
|
||||
@ -3222,7 +3266,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventEntry *pBuffer,
|
||||
UvmEventEntry_V1 *pBuffer,
|
||||
NvU64 *nEntries);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -3418,10 +3462,15 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
|
||||
//
|
||||
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
|
||||
// sizeof(UvmToolsEventControlData_V2).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
// sizeof(UvmEventEntry_V2).
|
||||
NvLength UvmToolsGetEventEntrySize(void);
|
||||
#endif
|
||||
|
||||
NvLength UvmToolsGetNumberOfCounters(void);
|
||||
|
||||
@ -3436,6 +3485,12 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
// UvmToolsEventControlData_V2::version records the entry version that
|
||||
// will be generated.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
@ -3447,10 +3502,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). One could call
|
||||
// UvmToolsGetEventControlSize() function to find out current size of
|
||||
// UvmToolsEventControlData. Gets pinned until queue is destroyed.
|
||||
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
|
||||
// UvmToolsEventControlData_V2 (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
// Handle to the created queue.
|
||||
@ -3460,22 +3514,32 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate a queue of requested size. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory). Also it could not
|
||||
// have been possible to create UvmToolsEventQueueDescriptor.
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate a queue of requested size. Another
|
||||
// would be either event_buffer or event_control memory couldn't be
|
||||
// pinned (e.g. because of OS limitation of pinnable memory). Also it
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#else
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#endif
|
||||
|
||||
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
|
||||
|
||||
@ -3512,7 +3576,7 @@ NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsDestroyEventQueue
|
||||
//
|
||||
// Destroys all internal resources associated with the queue. It unpinns the
|
||||
// Destroys all internal resources associated with the queue. It unpins the
|
||||
// buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
|
||||
// destroyed when corresponding session gets destroyed.
|
||||
//
|
||||
@ -3534,7 +3598,7 @@ NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
|
||||
// UvmEventQueueEnableEvents
|
||||
//
|
||||
// This call enables a particular event type in the event queue. All events are
|
||||
// disabled by default. Any event type is considered listed if and only if it's
|
||||
// disabled by default. Any event type is considered listed if and only if its
|
||||
// corresponding value is equal to 1 (in other words, bit is set). Disabled
|
||||
// events listed in eventTypeFlags are going to be enabled. Enabled events and
|
||||
// events not listed in eventTypeFlags are not affected by this call.
|
||||
@ -3567,7 +3631,7 @@ NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
|
||||
// UvmToolsEventQueueDisableEvents
|
||||
//
|
||||
// This call disables a particular event type in the event queue. Any event type
|
||||
// is considered listed if and only if it's corresponding value is equal to 1
|
||||
// is considered listed if and only if its corresponding value is equal to 1
|
||||
// (in other words, bit is set). Enabled events listed in eventTypeFlags are
|
||||
// going to be disabled. Disabled events and events not listed in eventTypeFlags
|
||||
// are not affected by this call.
|
||||
@ -3605,7 +3669,7 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
|
||||
//
|
||||
// Counters position follows the layout of the memory that UVM driver decides to
|
||||
// use. To obtain particular counter value, user should perform consecutive
|
||||
// atomic reads at a a given buffer + offset address.
|
||||
// atomic reads at a given buffer + offset address.
|
||||
//
|
||||
// It is not defined what is the initial value of a counter. User should rely on
|
||||
// a difference between each snapshot.
|
||||
@ -3628,9 +3692,9 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
|
||||
// Provided session is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate counters structure. Another would be that
|
||||
// either event_buffer or event_control memory couldn't be pinned
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate counters structure. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory)
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
@ -3641,12 +3705,12 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateProcessorCounters
|
||||
//
|
||||
// Creates the counters structure for tracking per-process counters.
|
||||
// Creates the counters structure for tracking per-processor counters.
|
||||
// These counters are disabled by default.
|
||||
//
|
||||
// Counters position follows the layout of the memory that UVM driver decides to
|
||||
// use. To obtain particular counter value, user should perform consecutive
|
||||
// atomic reads at a a given buffer + offset address.
|
||||
// atomic reads at a given buffer + offset address.
|
||||
//
|
||||
// It is not defined what is the initial value of a counter. User should rely on
|
||||
// a difference between each snapshot.
|
||||
@ -3662,7 +3726,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
// counters are destroyed.
|
||||
//
|
||||
// processorUuid: (INPUT)
|
||||
// UUID of the resource, for which counters will provide statistic data.
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, the GPU instance UUID of the partition, or the CPU UUID of
|
||||
// the resource, for which counters will provide statistic data.
|
||||
//
|
||||
// counters: (OUTPUT)
|
||||
// Handle to the created counters.
|
||||
@ -3672,9 +3738,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
|
||||
// session handle does not refer to a valid tools session
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES
|
||||
// There could be multiple reasons for this error. One would be that it's
|
||||
// not possible to allocate counters structure. Another would be that
|
||||
// either event_buffer or event_control memory couldn't be pinned
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate counters structure. Another would be
|
||||
// that either event_buffer or event_control memory couldn't be pinned
|
||||
// (e.g. because of OS limitation of pinnable memory)
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT
|
||||
@ -3690,7 +3756,7 @@ NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle session,
|
||||
// UvmToolsDestroyCounters
|
||||
//
|
||||
// Destroys all internal resources associated with this counters structure.
|
||||
// It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
|
||||
// It unpins the buffer provided in UvmToolsCreate*Counters. Counters structure
|
||||
// also gest destroyed when corresponding session is destroyed.
|
||||
//
|
||||
// Arguments:
|
||||
@ -3711,7 +3777,7 @@ NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
|
||||
// UvmToolsEnableCounters
|
||||
//
|
||||
// This call enables certain counter types in the counters structure. Any
|
||||
// counter type is considered listed if and only if it's corresponding value is
|
||||
// counter type is considered listed if and only if its corresponding value is
|
||||
// equal to 1 (in other words, bit is set). Disabled counter types listed in
|
||||
// counterTypeFlags are going to be enabled. Already enabled counter types and
|
||||
// counter types not listed in counterTypeFlags are not affected by this call.
|
||||
@ -3745,7 +3811,7 @@ NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
|
||||
// UvmToolsDisableCounters
|
||||
//
|
||||
// This call disables certain counter types in the counters structure. Any
|
||||
// counter type is considered listed if and only if it's corresponding value is
|
||||
// counter type is considered listed if and only if its corresponding value is
|
||||
// equal to 1 (in other words, bit is set). Enabled counter types listed in
|
||||
// counterTypeFlags are going to be disabled. Already disabled counter types and
|
||||
// counter types not listed in counterTypeFlags are not affected by this call.
|
||||
@ -3890,32 +3956,72 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains registered,
|
||||
// its index in the table does not change. New registrations obtain the first
|
||||
// unused index.
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
// registered, its index in the table does not change.
|
||||
// Note that the index in the table corresponds to the processor ID reported
|
||||
// in UvmEventEntry event records and that the table is not contiguously packed
|
||||
// with non-zero UUIDs even with no GPU unregistrations.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for the UUID table returned. The version must
|
||||
// match the requested version of the event queue created with
|
||||
// UvmToolsCreateEventQueue().
|
||||
// See UvmEventEntry_V1 and UvmEventEntry_V2.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The srcIndex and dstIndex fields of the
|
||||
// UvmEventMigrationInfo struct index this array. Unused indices will
|
||||
// have a UUID of zero.
|
||||
// have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
|
||||
// for the UUID of the physical GPU and only supports a single SMC
|
||||
// partition registered per process. Version UvmEventEntry_V2 supports
|
||||
// multiple SMC partitions registered per process and uses physical GPU
|
||||
// UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
|
||||
// UUIDs for SMC partitions.
|
||||
// The table pointer can be NULL in which case, the size of the table
|
||||
// needed to hold all the UUIDs is returned in 'count'.
|
||||
//
|
||||
// table_size: (INPUT)
|
||||
// The size of the table in number of array elements. This can be
|
||||
// zero if the table pointer is NULL.
|
||||
//
|
||||
// count: (OUTPUT)
|
||||
// Set by UVM to the number of UUIDs written, including any gaps in
|
||||
// the table due to unregistered GPUs.
|
||||
// On output, it is set by UVM to the number of UUIDs needed to hold
|
||||
// all the UUIDs, including any gaps in the table due to unregistered
|
||||
// GPUs.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
// writing to table failed or the count pointer was invalid.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
|
||||
// The count pointer is NULL.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
//
|
||||
// NV_WARN_MISMATCHED_TARGET:
|
||||
// The kernel returned a table suitable for UvmEventEntry_V1 events.
|
||||
// (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table,
|
||||
NvLength *count);
|
||||
#else
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table,
|
||||
NvLength table_size,
|
||||
NvLength *count);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsFlushEvents
|
||||
|
@ -34,16 +34,6 @@
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
@ -246,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
@ -284,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
@ -370,6 +370,8 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#else
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
#endif
|
||||
|
||||
return status;
|
||||
@ -403,21 +405,24 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
// Residency mask needs to be computed even if prefetching is disabled since
|
||||
// the residency information is also needed by access counters servicing in
|
||||
// uvm_ats_service_access_counters()
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
@ -637,8 +642,18 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
|
||||
// is just an optimization and servicing access counter migrations is still
|
||||
// worthwhile even without any prefetching added. So, let servicing continue
|
||||
// instead of returning early even if the prefetch computation fails.
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
// Remove pages which are already resident at the intended destination from
|
||||
// the accessed_mask.
|
||||
uvm_page_mask_andnot(&ats_context->accessed_mask,
|
||||
&ats_context->accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
|
@ -318,10 +318,11 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
|
||||
unsigned i;
|
||||
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
|
||||
return *buffer = 0;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
|
||||
|
@ -151,22 +151,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
mem_info->uuid = gpu->parent->uuid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -197,11 +181,6 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, ¶ms->gpu_uuid), done);
|
||||
|
||||
TEST_CHECK_GOTO((memory_info.size == params->size), done);
|
||||
@ -309,11 +288,6 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
|
||||
// physical GPU UUID until the UVM user level has been updated to use
|
||||
// the GI UUID.
|
||||
fix_memory_info_uuid(va_space, &memory_info);
|
||||
|
||||
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
|
||||
|
||||
memset(pte_buffer, 0, sizeof(pte_buffer));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -314,7 +314,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// interrupts in the bottom half in the future, the bottom half flush
|
||||
// below will no longer be able to guarantee that all outstanding
|
||||
// notifications have been handled.
|
||||
uvm_gpu_access_counters_set_ignore(gpu, true);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);
|
||||
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
|
||||
|
||||
@ -373,13 +373,13 @@ static NV_STATUS uvm_resume(void)
|
||||
|
||||
// Bring the fault buffer software state back in sync with the
|
||||
// hardware state.
|
||||
uvm_gpu_fault_buffer_resume(gpu->parent);
|
||||
uvm_parent_gpu_fault_buffer_resume(gpu->parent);
|
||||
|
||||
uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
|
||||
|
||||
// Reenable access counter interrupt processing unless notifications
|
||||
// have been set to be suppressed.
|
||||
uvm_gpu_access_counters_set_ignore(gpu, false);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
|
||||
}
|
||||
|
||||
uvm_up_write(&g_uvm_global.pm.lock);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -59,6 +59,7 @@ MODULE_PARM_DESC(uvm_peer_copy, "Choose the addressing mode for peer copying, op
|
||||
|
||||
static void remove_gpu(uvm_gpu_t *gpu);
|
||||
static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu);
|
||||
static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu);
|
||||
static void destroy_nvlink_peers(uvm_gpu_t *gpu);
|
||||
|
||||
@ -241,6 +242,8 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
|
||||
gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
|
||||
}
|
||||
|
||||
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@ -843,11 +846,11 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
if (!uvm_procfs_is_enabled())
|
||||
return NV_OK;
|
||||
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), uvm_gpu_uuid(gpu));
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->parent->uuid);
|
||||
|
||||
gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();
|
||||
|
||||
// Create UVM-GPU-${UUID}/${sub_processor_index} directory
|
||||
// Create UVM-GPU-${physical-UUID}/${sub_processor_index} directory
|
||||
snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));
|
||||
|
||||
gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
|
||||
@ -855,7 +858,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
// Create symlink from ${gpu_id} to
|
||||
// gpus/UVM-GPU-${UUID}/${sub_processor_index}
|
||||
// UVM-GPU-${physical-UUID}/${sub_processor_index}
|
||||
snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
|
||||
snprintf(gpu_dir_name,
|
||||
sizeof(gpu_dir_name),
|
||||
@ -867,6 +870,16 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
if (gpu->procfs.dir_symlink == NULL)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (gpu->parent->smc.enabled) {
|
||||
// Create symlink from UVM-GPU-${GI-UUID} to
|
||||
// UVM-GPU-${physical-UUID}/${sub_processor_index}
|
||||
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->uuid);
|
||||
|
||||
gpu->procfs.gpu_instance_uuid_symlink = proc_symlink(uuid_text_buffer, gpu_base_dir_entry, gpu_dir_name);
|
||||
if (gpu->procfs.gpu_instance_uuid_symlink == NULL)
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
}
|
||||
|
||||
// GPU peer files are debug only
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
@ -882,6 +895,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.gpu_instance_uuid_symlink);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
@ -1038,6 +1052,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
NvU32 num_entries;
|
||||
NvU64 va_size;
|
||||
NvU64 va_per_entry;
|
||||
uvm_mmu_page_table_alloc_t *tree_alloc;
|
||||
|
||||
status = uvm_page_tree_init(gpu,
|
||||
NULL,
|
||||
@ -1059,20 +1074,30 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
|
||||
// Make sure that RM's part of the VA is aligned to the VA covered by a
|
||||
// single top level PDE.
|
||||
UVM_ASSERT_MSG(gpu->parent->rm_va_base % va_per_entry == 0,
|
||||
"va_base 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_base, va_per_entry);
|
||||
"va_base 0x%llx va_per_entry 0x%llx\n",
|
||||
gpu->parent->rm_va_base,
|
||||
va_per_entry);
|
||||
UVM_ASSERT_MSG(gpu->parent->rm_va_size % va_per_entry == 0,
|
||||
"va_size 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_size, va_per_entry);
|
||||
"va_size 0x%llx va_per_entry 0x%llx\n",
|
||||
gpu->parent->rm_va_size,
|
||||
va_per_entry);
|
||||
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
|
||||
|
||||
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
|
||||
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
tree_alloc->addr.address,
|
||||
num_entries,
|
||||
tree_alloc->addr.aperture == UVM_APERTURE_VID,
|
||||
gpu_get_internal_pasid(gpu)));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->rm_address_space_moved_to_page_tree = true;
|
||||
|
||||
return NV_OK;
|
||||
@ -1212,6 +1237,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
size_t len;
|
||||
NV_STATUS status;
|
||||
|
||||
if (gpu->parent->smc.enabled) {
|
||||
@ -1229,6 +1256,20 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
|
||||
gpu->smc.swizz_id = gpu_info->smcSwizzId;
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->parent->uuid);
|
||||
snprintf(gpu->name,
|
||||
sizeof(gpu->name),
|
||||
"ID %u: %s",
|
||||
uvm_id_value(gpu->id),
|
||||
uuid_buffer + 4);
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->uuid);
|
||||
len = strlen(gpu->name);
|
||||
snprintf(gpu->name + len,
|
||||
sizeof(gpu->name) - len,
|
||||
" UVM-GI-%s",
|
||||
uuid_buffer + 8);
|
||||
|
||||
// Initialize the per-GPU procfs dirs as early as possible so that other
|
||||
// parts of the driver can add files in them as part of their per-GPU init.
|
||||
status = init_procfs_dirs(gpu);
|
||||
@ -1338,7 +1379,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
NV_STATUS status;
|
||||
bool alloc_parent = (parent_gpu == NULL);
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
@ -1364,13 +1404,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
if (alloc_parent)
|
||||
fill_parent_gpu_info(parent_gpu, gpu_info);
|
||||
|
||||
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
|
||||
snprintf(gpu->name,
|
||||
sizeof(gpu->name),
|
||||
"ID %u: %s",
|
||||
uvm_id_value(gpu->id),
|
||||
uuid_buffer);
|
||||
|
||||
// After this point all error clean up should be handled by remove_gpu()
|
||||
|
||||
if (!gpu_supports_uvm(parent_gpu)) {
|
||||
@ -1432,13 +1465,25 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
if (alloc_parent) {
|
||||
if (gpu->parent->smc.enabled) {
|
||||
status = discover_smc_peers(gpu);
|
||||
if (status != NV_OK) {
|
||||
// Nobody can have retained the GPU yet, since we still hold the
|
||||
// global lock.
|
||||
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
|
||||
atomic64_set(&gpu->retained_count, 0);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
else if (alloc_parent) {
|
||||
status = discover_nvlink_peers(gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Nobody can have retained the GPU yet, since we still hold the global
|
||||
// lock.
|
||||
// Nobody can have retained the GPU yet, since we still hold the
|
||||
// global lock.
|
||||
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
|
||||
atomic64_set(&gpu->retained_count, 0);
|
||||
goto error;
|
||||
@ -1686,7 +1731,7 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
for_each_sub_processor_index(sub_processor_index)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
uvm_kvfree(parent_gpu);
|
||||
@ -1915,32 +1960,25 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_gpu_id_t gpu_id;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id(gpu_id) {
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
if (gpu) {
|
||||
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
|
||||
return gpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
return gpu_get_by_uuid_locked(gpu_uuid);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
static uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
@ -1998,7 +2036,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
|
||||
if (parent_gpu != NULL) {
|
||||
// If the UUID has been seen before, and if SMC is enabled, then check
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
// if this specific partition has been seen previously. The UUID-based
|
||||
// look-up above may have succeeded for a different partition with the
|
||||
// same parent GPU.
|
||||
if (gpu_info->smcEnabled) {
|
||||
@ -2287,7 +2325,7 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
// Create a symlink from UVM GPU UUID (UVM-GPU-...) to the UVM GPU ID gpuB
|
||||
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), uvm_gpu_uuid(remote));
|
||||
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), &remote->uuid);
|
||||
peer_caps->procfs.peer_symlink_file[local_idx] = proc_symlink(symlink_name,
|
||||
local->procfs.dir_peers,
|
||||
gpu_dir_name);
|
||||
@ -2297,6 +2335,24 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_peer_files(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
uvm_gpu_t *gpu1,
|
||||
const UvmGpuP2PCapsParams *p2p_caps_params,
|
||||
@ -2377,16 +2433,41 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
}
|
||||
|
||||
if (!uvm_procfs_is_debug_enabled())
|
||||
return NV_OK;
|
||||
return init_procfs_peer_files(gpu0, gpu1);
|
||||
}
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU32 sub_processor_index;
|
||||
uvm_gpu_t *other_gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
UVM_ASSERT(gpu);
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(gpu->parent->smc.enabled);
|
||||
|
||||
for_each_sub_processor_index(sub_processor_index) {
|
||||
uvm_gpu_peer_t *peer_caps;
|
||||
|
||||
other_gpu = gpu->parent->gpus[sub_processor_index];
|
||||
if (!other_gpu || other_gpu == gpu)
|
||||
continue;
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
|
||||
if (peer_caps->ref_count == 1)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(peer_caps->ref_count == 0);
|
||||
|
||||
memset(peer_caps, 0, sizeof(*peer_caps));
|
||||
peer_caps->ref_count = 1;
|
||||
|
||||
status = init_procfs_peer_files(gpu, other_gpu);
|
||||
if (status != NV_OK) {
|
||||
peer_caps->ref_count = 0;
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@ -2489,9 +2570,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
|
||||
|
||||
UVM_ASSERT(gpu);
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (gpu->parent->smc.enabled)
|
||||
return NV_OK;
|
||||
UVM_ASSERT(!gpu->parent->smc.enabled);
|
||||
|
||||
for_each_gpu(other_gpu) {
|
||||
UvmGpuP2PCapsParams p2p_caps_params;
|
||||
@ -2592,10 +2671,6 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
UVM_ASSERT(gpu0);
|
||||
UVM_ASSERT(gpu1);
|
||||
|
||||
// P2P is not supported under SMC partitioning
|
||||
UVM_ASSERT(!gpu0->parent->smc.enabled);
|
||||
UVM_ASSERT(!gpu1->parent->smc.enabled);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
|
||||
@ -2638,9 +2713,9 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
// IDs queried from the peer table above which are about to be removed from
|
||||
// the global table.
|
||||
if (gpu0->parent->access_counters_supported)
|
||||
uvm_gpu_access_counter_buffer_flush(gpu0);
|
||||
uvm_parent_gpu_access_counter_buffer_flush(gpu0->parent);
|
||||
if (gpu1->parent->access_counters_supported)
|
||||
uvm_gpu_access_counter_buffer_flush(gpu1);
|
||||
uvm_parent_gpu_access_counter_buffer_flush(gpu1->parent);
|
||||
|
||||
memset(peer_caps, 0, sizeof(*peer_caps));
|
||||
}
|
||||
@ -2668,12 +2743,17 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu)
|
||||
{
|
||||
size_t peer_index;
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
// Indirect peers are accessed as sysmem addresses
|
||||
if (peer_caps->is_indirect_peer)
|
||||
return UVM_APERTURE_SYS;
|
||||
|
||||
// MIG instances in the same physical GPU have vidmem addresses
|
||||
if (local_gpu->parent == remote_gpu->parent)
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
if (uvm_id_value(local_gpu->id) < uvm_id_value(remote_gpu->id))
|
||||
peer_index = 0;
|
||||
else
|
||||
@ -3285,12 +3365,19 @@ NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *fil
|
||||
.user_client = params->hClient,
|
||||
.user_object = params->hSmcPartRef,
|
||||
};
|
||||
NvProcessorUuid gpu_instance_uuid;
|
||||
NV_STATUS status;
|
||||
|
||||
return uvm_va_space_register_gpu(va_space,
|
||||
¶ms->gpu_uuid,
|
||||
&user_rm_va_space,
|
||||
¶ms->numaEnabled,
|
||||
¶ms->numaNodeId);
|
||||
status = uvm_va_space_register_gpu(va_space,
|
||||
¶ms->gpu_uuid,
|
||||
&user_rm_va_space,
|
||||
¶ms->numaEnabled,
|
||||
¶ms->numaNodeId,
|
||||
&gpu_instance_uuid);
|
||||
if (status == NV_OK)
|
||||
uvm_uuid_copy(¶ms->gpu_uuid, &gpu_instance_uuid);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp)
|
||||
@ -3363,10 +3450,10 @@ NV_STATUS uvm_test_set_prefetch_filtering(UVM_TEST_SET_PREFETCH_FILTERING_PARAMS
|
||||
|
||||
switch (params->filtering_mode) {
|
||||
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_ALL:
|
||||
uvm_gpu_disable_prefetch_faults(gpu->parent);
|
||||
uvm_parent_gpu_disable_prefetch_faults(gpu->parent);
|
||||
break;
|
||||
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_NONE:
|
||||
uvm_gpu_enable_prefetch_faults(gpu->parent);
|
||||
uvm_parent_gpu_enable_prefetch_faults(gpu->parent);
|
||||
break;
|
||||
default:
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
|
@ -618,9 +618,10 @@ struct uvm_gpu_struct
|
||||
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
|
||||
// Nice printable name in the format:
|
||||
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
@ -656,6 +657,10 @@ struct uvm_gpu_struct
|
||||
// can allocate through PMM (PMA).
|
||||
NvU64 max_allocatable_address;
|
||||
|
||||
// Max supported vidmem page size may be smaller than the max GMMU page
|
||||
// size, because of the vMMU supported page sizes.
|
||||
NvU64 max_vidmem_page_size;
|
||||
|
||||
struct
|
||||
{
|
||||
// True if the platform supports HW coherence and the GPU's memory
|
||||
@ -844,6 +849,9 @@ struct uvm_gpu_struct
|
||||
|
||||
struct proc_dir_entry *dir_symlink;
|
||||
|
||||
// The GPU instance UUID symlink if SMC is enabled.
|
||||
struct proc_dir_entry *gpu_instance_uuid_symlink;
|
||||
|
||||
struct proc_dir_entry *info_file;
|
||||
|
||||
struct proc_dir_entry *dir_peers;
|
||||
@ -1210,11 +1218,6 @@ static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
return gpu->name;
|
||||
}
|
||||
|
||||
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
|
||||
{
|
||||
return &gpu->parent->uuid;
|
||||
}
|
||||
|
||||
static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (gpu->parent->smc.enabled)
|
||||
@ -1234,6 +1237,9 @@ struct uvm_gpu_peer_struct
|
||||
// - The global lock is held.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be SMC peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be NVLINK peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
@ -1319,17 +1325,17 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
|
||||
// Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
|
||||
// a circular dep between global and gpu modules.
|
||||
|
||||
// Get a uvm_gpu_t by UUID. This returns NULL if the GPU is not present. This
|
||||
// is the general purpose call that should be used normally.
|
||||
// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
|
||||
// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
|
||||
// instead.
|
||||
// Get a uvm_gpu_t by UUID (physical GPU UUID if SMC is not enabled, otherwise
|
||||
// GPU instance UUID).
|
||||
// This returns NULL if the GPU is not present.
|
||||
// This is the general purpose call that should be used normally.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Get a uvm_parent_gpu_t by UUID. Like uvm_gpu_get_by_uuid(), this function
|
||||
// returns NULL if the GPU has not been registered.
|
||||
// Get a uvm_parent_gpu_t by UUID (physical GPU UUID).
|
||||
// Like uvm_gpu_get_by_uuid(), this function returns NULL if the GPU has not
|
||||
// been registered.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
@ -1340,13 +1346,6 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
// limited cases.
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
|
||||
// the partition hasn't been registered. This call needs to be used instead of
|
||||
// uvm_gpu_get_by_uuid() when a specific partition is targeted.
|
||||
//
|
||||
// LOCKING: requires the global lock to be held
|
||||
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);
|
||||
|
||||
// Retain a gpu by uuid
|
||||
// Returns the retained uvm_gpu_t in gpu_out on success
|
||||
//
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,7 +33,7 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
|
||||
@ -99,7 +99,8 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
|
||||
"Number of remote accesses on a region required to trigger a notification."
|
||||
"Valid values: [1, 65535]");
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode);
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode);
|
||||
|
||||
static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
|
||||
|
||||
@ -126,7 +127,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are disabled by default on all systems except P9.
|
||||
// - MIMC migrations are disabled by default on all non-ATS systems.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
@ -149,7 +150,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
if (UVM_ATS_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
return false;
|
||||
@ -281,7 +282,7 @@ get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm
|
||||
&(access_counters)->current_config.momc;
|
||||
}
|
||||
|
||||
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
@ -340,7 +341,7 @@ static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *confi
|
||||
UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
@ -444,12 +445,12 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
uvm_gpu_deinit_access_counters(parent_gpu);
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
@ -475,7 +476,7 @@ void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
batch_context->phys.translations = NULL;
|
||||
}
|
||||
|
||||
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return false;
|
||||
@ -518,7 +519,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
|
||||
// taken control of the notify buffer since the GPU was initialized. Then
|
||||
// flush old notifications. This will update the cached_put pointer.
|
||||
access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
access_counters->current_config.threshold = config->threshold;
|
||||
|
||||
@ -537,20 +538,20 @@ error:
|
||||
|
||||
// If ownership is yielded as part of reconfiguration, the access counters
|
||||
// handling refcount may not be 0
|
||||
static void access_counters_yield_ownership(uvm_gpu_t *gpu)
|
||||
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
|
||||
// Wait for any pending clear operation befor releasing ownership
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
|
||||
&access_counters->rm_info));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
@ -579,14 +580,14 @@ static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConf
|
||||
|
||||
// Decrement the refcount of access counter enablement. If this is the last
|
||||
// reference, disable the HW feature.
|
||||
static void gpu_access_counters_disable(uvm_gpu_t *gpu)
|
||||
static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
|
||||
|
||||
if (--gpu->parent->isr.access_counters.handling_ref_count == 0)
|
||||
access_counters_yield_ownership(gpu);
|
||||
if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
|
||||
access_counters_yield_ownership(parent_gpu);
|
||||
}
|
||||
|
||||
// Invoked during registration of the GPU in the VA space
|
||||
@ -598,7 +599,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
}
|
||||
else {
|
||||
@ -616,7 +617,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
// modified to protect from concurrent enablement of access counters in
|
||||
// another GPU
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
}
|
||||
|
||||
// If this is the first reference taken on access counters, dropping the
|
||||
@ -626,22 +627,24 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
gpu_access_counters_disable(gpu);
|
||||
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
|
||||
parent_gpu->id)) {
|
||||
parent_gpu_access_counters_disable(parent_gpu);
|
||||
|
||||
// If this is VA space reconfigured access counters, clear the
|
||||
// ownership to allow for other processes to invoke the reconfiguration
|
||||
if (gpu->parent->access_counter_buffer_info.reconfiguration_owner == va_space)
|
||||
gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
|
||||
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
@ -660,15 +663,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
|
||||
}
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode)
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode)
|
||||
{
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
|
||||
@ -680,28 +684,28 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
|
||||
gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get);
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
|
||||
++get;
|
||||
if (get == access_counters->max_notifications)
|
||||
get = 0;
|
||||
}
|
||||
|
||||
write_get(gpu->parent, get);
|
||||
write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Disables access counter interrupts and notification servicing
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
|
||||
@ -1027,7 +1031,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
if (!iter.migratable)
|
||||
continue;
|
||||
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, address, processor);
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
|
||||
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
|
||||
// If the page is throttling, ignore the access counter
|
||||
// notification
|
||||
@ -1212,7 +1216,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context->mm = mm;
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
@ -1221,7 +1226,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
|
||||
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
@ -1506,8 +1512,6 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
@ -1519,6 +1523,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
|
||||
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
@ -1546,7 +1551,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, addr);
|
||||
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);
|
||||
|
||||
// resident_id might be invalid or might already be the same as the GPU
|
||||
// which received the notification if the memory was already migrated before
|
||||
@ -1602,6 +1607,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
@ -1610,16 +1616,24 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
|
||||
uvm_page_mask_zero(accessed_pages);
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
|
||||
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
else
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
|
||||
expand_notification_block(gpu_va_space,
|
||||
va_block,
|
||||
batch_context->block_service_context.block_context,
|
||||
accessed_pages,
|
||||
current_entry);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
@ -1698,6 +1712,9 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
|
||||
// location is set
|
||||
// If no pages were actually migrated, don't clear the access counters.
|
||||
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
@ -1985,7 +2002,7 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
params->enabled = uvm_gpu_access_counters_required(gpu->parent);
|
||||
params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
@ -2050,11 +2067,11 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = gpu_access_counters_enable(gpu, &config);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
else
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
@ -2066,7 +2083,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
// enabled in at least gpu. This inconsistent state is not visible to other
|
||||
// threads or VA spaces because of the ISR lock, and it is immediately
|
||||
// rectified by retaking ownership.
|
||||
access_counters_yield_ownership(gpu);
|
||||
access_counters_yield_ownership(gpu->parent);
|
||||
status = access_counters_take_ownership(gpu, &config);
|
||||
|
||||
// Retaking ownership failed, so RM owns the interrupt.
|
||||
@ -2080,8 +2097,8 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
|
||||
}
|
||||
|
||||
uvm_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id);
|
||||
gpu_access_counters_disable(gpu);
|
||||
uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
parent_gpu_access_counters_disable(gpu->parent);
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
@ -2167,42 +2184,42 @@ exit_release_gpu:
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
|
||||
{
|
||||
bool change_intr_state = false;
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
|
||||
if (do_ignore) {
|
||||
if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer_info.notifications_ignored_count >= 1);
|
||||
if (--gpu->parent->access_counter_buffer_info.notifications_ignored_count == 0)
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
|
||||
if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
|
||||
if (change_intr_state) {
|
||||
// We need to avoid an interrupt storm while ignoring notifications. We
|
||||
// just disable the interrupt.
|
||||
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (do_ignore)
|
||||
uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
else
|
||||
uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (!do_ignore)
|
||||
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
@ -2216,7 +2233,7 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_set_ignore(gpu, params->ignore);
|
||||
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
|
||||
else
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -27,13 +27,13 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
|
||||
|
||||
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Ignore or unignore access counters notifications. Ignoring means that the
|
||||
// bottom half is a no-op which just leaves notifications in the HW buffer
|
||||
@ -46,7 +46,7 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
|
||||
//
|
||||
// When uningoring, the interrupt conditions will be re-evaluated to trigger
|
||||
// processing of buffered notifications, if any exist.
|
||||
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
|
||||
|
||||
// Return whether the VA space has access counter migrations enabled. The
|
||||
// caller must ensure that the VA space cannot go away.
|
||||
@ -63,7 +63,7 @@ void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
|
||||
|
||||
// Check whether access counters should be enabled when the given GPU is
|
||||
// registered on any VA space.
|
||||
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Functions used to enable/disable access counters on a GPU in the given VA
|
||||
// space.
|
||||
@ -72,12 +72,12 @@ bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
|
||||
// counters are currently enabled. The hardware notifications and interrupts on
|
||||
// the GPU are enabled the first time any VA space invokes
|
||||
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
|
||||
// uvm_gpu_access_counters_disable
|
||||
// uvm_parent_gpu_access_counters_disable().
|
||||
//
|
||||
// Locking: the VA space lock must not be held by the caller since these
|
||||
// functions may take the access counters ISR lock.
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
|
||||
|
||||
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -100,7 +100,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
if (!uvm_parent_gpu_replayable_faults_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
@ -137,7 +137,7 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
|
||||
// interrupts will be triggered by the gpu and faults may stay
|
||||
// unserviced. Therefore, if there is a fault in the queue, we schedule
|
||||
// a bottom half unconditionally.
|
||||
if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
|
||||
if (!uvm_parent_gpu_non_replayable_faults_pending(parent_gpu))
|
||||
return 0;
|
||||
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
@ -167,7 +167,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_gpu_access_counters_pending(parent_gpu)) {
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.access_counters.service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
@ -295,7 +295,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
if (parent_gpu->replayable_faults_supported) {
|
||||
status = uvm_gpu_fault_buffer_init(parent_gpu);
|
||||
status = uvm_parent_gpu_fault_buffer_init(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -361,7 +361,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
status = uvm_gpu_init_access_counters(parent_gpu);
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -423,7 +423,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// bottom half never take the global lock, since we're holding it here.
|
||||
//
|
||||
// Note that it's safe to call nv_kthread_q_stop() even if
|
||||
// nv_kthread_q_init() failed in uvm_gpu_init_isr().
|
||||
// nv_kthread_q_init() failed in uvm_parent_gpu_init_isr().
|
||||
nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
|
||||
nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
|
||||
}
|
||||
@ -438,8 +438,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
// replayable_faults.disable_intr_ref_count since they must retain the
|
||||
// GPU across uvm_parent_gpu_replayable_faults_isr_lock/
|
||||
// uvm_parent_gpu_replayable_faults_isr_unlock. This means the
|
||||
// uvm_gpu_replayable_faults_disable_intr above could only have raced
|
||||
// with bottom halves.
|
||||
// uvm_parent_gpu_replayable_faults_disable_intr above could only have
|
||||
// raced with bottom halves.
|
||||
//
|
||||
// If we cleared replayable_faults.handling before the bottom half got
|
||||
// to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
|
||||
@ -455,13 +455,13 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
parent_gpu->isr.replayable_faults.disable_intr_ref_count);
|
||||
|
||||
uvm_gpu_fault_buffer_deinit(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_gpu_deinit_access_counters(parent_gpu);
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -116,8 +116,8 @@
|
||||
|
||||
|
||||
// There is no error handling in this function. The caller is in charge of
|
||||
// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
|
||||
@ -145,7 +145,7 @@ NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *pare
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
|
||||
@ -163,7 +163,7 @@ void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_
|
||||
non_replayable_faults->fault_cache = NULL;
|
||||
}
|
||||
|
||||
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvBool has_pending_faults;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017 NVIDIA Corporation
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -26,12 +26,12 @@
|
||||
#include <nvstatus.h>
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -44,6 +44,24 @@
|
||||
// provides some background for understanding replayable faults, non-replayable
|
||||
// faults, and how UVM services each fault type.
|
||||
|
||||
// The HW fault buffer flush mode instructs RM on how to flush the hardware
|
||||
// replayable fault buffer; it is only used in Confidential Computing.
|
||||
//
|
||||
// Unless HW_FAULT_BUFFER_FLUSH_MODE_MOVE is functionally required (because UVM
|
||||
// needs to inspect the faults currently present in the HW fault buffer) it is
|
||||
// recommended to use HW_FAULT_BUFFER_FLUSH_MODE_DISCARD for performance
|
||||
// reasons.
|
||||
typedef enum
|
||||
{
|
||||
// Flush the HW fault buffer, discarding all the resulting faults. UVM never
|
||||
// gets to see these faults.
|
||||
HW_FAULT_BUFFER_FLUSH_MODE_DISCARD,
|
||||
|
||||
// Flush the HW fault buffer, and move all the resulting faults to the SW
|
||||
// fault ("shadow") buffer.
|
||||
HW_FAULT_BUFFER_FLUSH_MODE_MOVE,
|
||||
} hw_fault_buffer_flush_mode_t;
|
||||
|
||||
#define UVM_PERF_REENABLE_PREFETCH_FAULTS_LAPSE_MSEC_DEFAULT 1000
|
||||
|
||||
// Lapse of time in milliseconds after which prefetch faults can be re-enabled.
|
||||
@ -226,7 +244,7 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
batch_context->utlbs = NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -253,7 +271,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
goto fail;
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
status = uvm_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
|
||||
status = uvm_parent_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
}
|
||||
@ -261,28 +279,28 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
uvm_gpu_fault_buffer_deinit(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Reinitialize state relevant to replayable fault handling after returning
|
||||
// from a power management cycle.
|
||||
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
fault_buffer_reinit_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported)
|
||||
uvm_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
|
||||
uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
|
||||
|
||||
fault_buffer_deinit_replayable_faults(parent_gpu);
|
||||
|
||||
@ -297,7 +315,7 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
|
||||
@ -533,25 +551,26 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
parent_gpu->fault_buffer_hal->write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
|
||||
// In Confidential Computing GSP-RM owns the HW replayable fault buffer.
|
||||
// Flushing the fault buffer implies flushing both the HW buffer (using a RM
|
||||
// API), and the SW buffer accessible by UVM ("shadow" buffer).
|
||||
//
|
||||
// The HW buffer needs to be flushed first. This is because, once that flush
|
||||
// completes, any faults that were present in the HW buffer have been moved to
|
||||
// the shadow buffer, or have been discarded by RM.
|
||||
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_fault_buffer_flush_mode_t flush_mode)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS status;
|
||||
NvBool is_flush_mode_move;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
UVM_ASSERT((flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE) || (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_DISCARD));
|
||||
|
||||
// When Confidential Computing is enabled, GSP-RM owns the HW replayable
|
||||
// fault buffer. Flushing the fault buffer implies flushing both the HW
|
||||
// buffer (using a RM API), and the SW buffer accessible by UVM ("shadow"
|
||||
// buffer).
|
||||
//
|
||||
// The HW buffer needs to be flushed first. This is because, once that
|
||||
// flush completes, any faults that were present in the HW buffer when
|
||||
// fault_buffer_flush_locked is called, are now either flushed from the HW
|
||||
// buffer, or are present in the shadow buffer and are about to be discarded
|
||||
// too.
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
// Flush the HW replayable buffer owned by GSP-RM.
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
|
||||
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@ -595,10 +614,9 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT || flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT) {
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu);
|
||||
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
}
|
||||
|
||||
@ -1435,7 +1453,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_fault_access_type_to_prot(service_access_type)))
|
||||
continue;
|
||||
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, current_entry->fault_address, gpu->id);
|
||||
thrashing_hint = uvm_perf_thrashing_get_hint(va_block,
|
||||
block_context->block_context,
|
||||
current_entry->fault_address,
|
||||
gpu->id);
|
||||
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
|
||||
// Throttling is implemented by sleeping in the fault handler on
|
||||
// the CPU and by continuing to process faults on other pages on
|
||||
@ -1981,7 +2002,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
|
||||
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
|
||||
// GSP to copy all available faults from the HW buffer into the shadow
|
||||
// buffer.
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent);
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent, HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
@ -2738,14 +2759,14 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu, uvm_fau
|
||||
(uvm_enable_builtin_tests &&
|
||||
parent_gpu->rm_info.isSimulated &&
|
||||
batch_context->num_invalid_prefetch_faults > 5))) {
|
||||
uvm_gpu_disable_prefetch_faults(parent_gpu);
|
||||
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
|
||||
|
||||
// Reenable prefetch faults after some time
|
||||
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
|
||||
uvm_gpu_enable_prefetch_faults(parent_gpu);
|
||||
uvm_parent_gpu_enable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2872,7 +2893,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
UVM_DBG_PRINT("Error servicing replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
|
||||
}
|
||||
|
||||
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
@ -2883,7 +2904,7 @@ void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
@ -2940,7 +2961,7 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA
|
||||
|
||||
do {
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
pending = uvm_gpu_replayable_faults_pending(gpu->parent);
|
||||
pending = uvm_parent_gpu_replayable_faults_pending(gpu->parent);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
|
||||
|
||||
if (!pending)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -52,12 +52,12 @@ typedef enum
|
||||
|
||||
const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);
|
||||
|
||||
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Clear valid bit for all remaining unserviced faults in the buffer, set GET to
|
||||
// PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
|
||||
@ -68,8 +68,8 @@ bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);
|
||||
|
||||
// Enable/disable HW support for prefetch-initiated faults
|
||||
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Service pending replayable faults on the given GPU. This function must be
|
||||
// only called from the ISR bottom half
|
||||
|
@ -1306,7 +1306,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_processor_mask_t map_processors;
|
||||
uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction;
|
||||
uvm_processor_id_t id;
|
||||
NV_STATUS tracker_status;
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -1333,9 +1333,9 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
|
||||
// Exclude the processors that have been already mapped due to
|
||||
// AccessedBy.
|
||||
uvm_processor_mask_andnot(&map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
|
||||
uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
|
||||
|
||||
for_each_gpu_id_in_mask(id, &map_processors) {
|
||||
for_each_gpu_id_in_mask(id, map_processors) {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
|
||||
@ -1866,7 +1866,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
|
||||
unsigned long *dst_pfns,
|
||||
uvm_page_mask_t *same_devmem_page_mask)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index);
|
||||
uvm_va_block_region_t chunk_region;
|
||||
struct page *dst_page;
|
||||
|
||||
@ -2708,7 +2708,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
// Since there is a CPU resident page, there shouldn't be one
|
||||
// anywhere else. TODO: Bug 3660922: Need to handle read
|
||||
// duplication at some point.
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
|
||||
service_context->block_context,
|
||||
page_index));
|
||||
|
||||
// migrate_vma_setup() was able to isolate and lock the page;
|
||||
// therefore, it is CPU resident and not mapped.
|
||||
@ -2725,8 +2727,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
// used for GPU to GPU copies. It can't be an evicted page because
|
||||
// migrate_vma_setup() would have found a source page.
|
||||
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
|
||||
service_context->block_context,
|
||||
page_index));
|
||||
hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2019 NVidia Corporation
|
||||
Copyright (c) 2013-2023 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -320,7 +320,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // IN
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN
|
||||
NvU32 numGpus; // IN
|
||||
NvU64 serverId NV_ALIGN_BYTES(8); // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
@ -344,9 +344,9 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // OUT
|
||||
NvU32 validCount; // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT
|
||||
NvU32 validCount; // OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_GET_GPU_UUID_TABLE_PARAMS;
|
||||
|
||||
#if defined(WIN32) || defined(WIN64)
|
||||
@ -494,7 +494,7 @@ typedef struct
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
@ -552,7 +552,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // IN
|
||||
NvProcessorUuid gpu_uuid; // IN/OUT
|
||||
NvBool numaEnabled; // OUT
|
||||
NvS32 numaNodeId; // OUT
|
||||
NvS32 rmCtrlFd; // IN
|
||||
@ -835,7 +835,14 @@ typedef struct
|
||||
|
||||
//
|
||||
// Initialize any tracker object such as a queue or counter
|
||||
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, UvmToolsCreateProcessorCounters
|
||||
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
|
||||
// UvmToolsCreateProcessorCounters.
|
||||
// Note that the order of structure elements has the version as the last field.
|
||||
// This is used to tell whether the kernel supports V2 events or not because
|
||||
// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
|
||||
// field but V2 will. This is needed because it is possible to create an event
|
||||
// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
|
||||
// been called yet and the kernel version is unknown.
|
||||
//
|
||||
#define UVM_TOOLS_INIT_EVENT_TRACKER UVM_IOCTL_BASE(56)
|
||||
typedef struct
|
||||
@ -847,6 +854,8 @@ typedef struct
|
||||
NvU32 allProcessors; // IN
|
||||
NvU32 uvmFd; // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 requestedVersion; // IN
|
||||
NvU32 grantedVersion; // OUT
|
||||
} UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;
|
||||
|
||||
//
|
||||
@ -927,6 +936,12 @@ typedef struct
|
||||
|
||||
//
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
|
||||
// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
|
||||
// processor IDs (physical GPU UUIDs) will be reported.
|
||||
// tablePtr == 0 and count == 0 can be used to query how many processors are
|
||||
// present in order to dynamically allocate the correct size array since the
|
||||
// total number of processors is returned in 'count'.
|
||||
//
|
||||
#define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE UVM_IOCTL_BASE(64)
|
||||
typedef struct
|
||||
@ -934,6 +949,7 @@ typedef struct
|
||||
NvU64 tablePtr NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 count; // IN/OUT
|
||||
NV_STATUS rmStatus; // OUT
|
||||
NvU32 version; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
|
||||
@ -979,7 +995,7 @@ typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
|
||||
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
|
||||
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;
|
||||
|
@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
#endif
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// Various issues prevent us from using mmu_notifiers in older kernels. These
|
||||
// include:
|
||||
// - ->release being called under RCU instead of SRCU: fixed by commit
|
||||
|
@ -633,8 +633,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
const UvmGpuMemoryInfo *mem_info)
|
||||
{
|
||||
uvm_gpu_t *owning_gpu = NULL;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_gpu_t *owning_gpu;
|
||||
|
||||
if (mem_info->egm)
|
||||
UVM_ASSERT(mem_info->sysmem);
|
||||
@ -653,16 +652,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
// registered.
|
||||
// This also checks for if EGM owning GPU is registered.
|
||||
|
||||
// TODO: Bug 4351121: RM will return the GI UUID, but
|
||||
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
|
||||
// Match on GI UUID until the UVM user level API has been updated to use
|
||||
// the GI UUID.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
|
||||
owning_gpu = gpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@ -954,6 +944,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Check for the maximum page size for the mapping of vidmem allocations,
|
||||
// the vMMU segment size may limit the range of page sizes.
|
||||
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
|
||||
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
|
||||
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
|
||||
@ -989,7 +985,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
if (uvm_api_range_invalid_4k(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
@ -86,7 +86,7 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
|
||||
|
||||
// Only map those pages that are not already mapped on destination
|
||||
for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, va_block_context, dest_id, page_index);
|
||||
if (prot == UVM_PROT_NONE)
|
||||
continue;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -149,6 +149,26 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The aperture may filter the biggest page size:
|
||||
// - UVM_APERTURE_VID biggest page size on vidmem mappings
|
||||
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
|
||||
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
|
||||
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
|
||||
{
|
||||
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
|
||||
|
||||
// There may be scenarios where the GMMU must use a subset of the supported
|
||||
// page sizes, e.g., to comply with the vMMU supported page sizes due to
|
||||
// segmentation sizes.
|
||||
if (aperture == UVM_APERTURE_VID) {
|
||||
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
|
||||
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
|
||||
}
|
||||
else {
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
@ -856,7 +876,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
|
||||
if (!page_tree_ats_init_required(tree))
|
||||
return NV_OK;
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(tree);
|
||||
page_size = mmu_biggest_page_size(tree, UVM_APERTURE_VID);
|
||||
|
||||
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
|
||||
|
||||
@ -1090,6 +1110,8 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
tree->gpu_va_space = gpu_va_space;
|
||||
tree->big_page_size = big_page_size;
|
||||
|
||||
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
|
||||
|
||||
page_tree_set_location(tree, location);
|
||||
|
||||
uvm_tracker_init(&tree->tracker);
|
||||
@ -2301,7 +2323,7 @@ NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_VID);
|
||||
size = UVM_ALIGN_UP(gpu->mem_info.max_allocatable_address + 1, page_size);
|
||||
|
||||
UVM_ASSERT(page_size);
|
||||
@ -2338,9 +2360,9 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
|
||||
return NV_OK;
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
|
||||
aperture = uvm_gpu_peer_aperture(gpu, peer);
|
||||
page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
|
||||
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
|
||||
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
|
||||
phys_offset = 0ULL;
|
||||
|
||||
@ -2783,7 +2805,7 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
|
||||
// sysmem mappings with 128K entries.
|
||||
UVM_ASSERT(is_power_of_2(mapping_size));
|
||||
UVM_ASSERT(mapping_size >= UVM_SIZE_1GB);
|
||||
UVM_ASSERT(mapping_size >= uvm_mmu_biggest_page_size(&gpu->address_space_tree));
|
||||
UVM_ASSERT(mapping_size >= mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS));
|
||||
UVM_ASSERT(mapping_size <= flat_sysmem_va_size);
|
||||
|
||||
flat_sysmem_va_size = UVM_ALIGN_UP(flat_sysmem_va_size, mapping_size);
|
||||
@ -2828,7 +2850,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
|
||||
if (sysmem_mapping->range_vec == NULL) {
|
||||
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
|
||||
NvU64 phys_offset = curr_pa;
|
||||
NvU32 page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
|
||||
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
|
||||
uvm_pmm_alloc_flags_t pmm_flags;
|
||||
|
||||
// No eviction is requested when allocating the page tree storage,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -612,6 +612,9 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
|
||||
}
|
||||
|
||||
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
|
||||
// the largest page size supported in a given system, which considers the GMMU
|
||||
// and vMMU page sizes and segment sizes.
|
||||
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
|
||||
@ -642,11 +645,6 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
|
||||
return page_size;
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_biggest_page_size(uvm_page_tree_t *tree)
|
||||
{
|
||||
return 1 << __fls(tree->hal->page_sizes());
|
||||
}
|
||||
|
||||
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
{
|
||||
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));
|
||||
|
@ -1442,6 +1442,7 @@ static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_locatio
|
||||
|
||||
static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
page_thrashing_info_t *page_thrashing,
|
||||
uvm_processor_id_t requester)
|
||||
@ -1460,7 +1461,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
|
||||
hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
|
||||
|
||||
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
|
||||
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
// HMM pages always start out resident on the CPU but may not be
|
||||
// recorded in the va_block state because hmm_range_fault() or
|
||||
@ -1601,6 +1602,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
// that case we keep the page pinned while applying the same algorithm as in
|
||||
// Phase1.
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address,
|
||||
uvm_processor_id_t requester)
|
||||
{
|
||||
@ -1713,6 +1715,7 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
else {
|
||||
hint = get_hint_for_migration_thrashing(va_space_thrashing,
|
||||
va_block,
|
||||
va_block_context,
|
||||
page_index,
|
||||
page_thrashing,
|
||||
requester);
|
||||
|
@ -74,7 +74,9 @@ typedef struct
|
||||
} uvm_perf_thrashing_hint_t;
|
||||
|
||||
// Obtain a hint to prevent thrashing on the page with given address
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block, NvU64 address,
|
||||
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address,
|
||||
uvm_processor_id_t requester);
|
||||
|
||||
// Obtain a pointer to a mask with the processors that are thrashing on the
|
||||
|
@ -1408,8 +1408,6 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(accessing_gpu, gpu);
|
||||
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
|
||||
|
||||
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
|
||||
|
||||
if (peer_caps->is_indirect_peer ||
|
||||
(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)) {
|
||||
// Indirect peers are accessed as sysmem addresses, so they don't need
|
||||
|
@ -1082,6 +1082,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
uvm_va_block_context_t *va_block_context = NULL;
|
||||
NvU32 num_blocks;
|
||||
NvU32 index = 0;
|
||||
uvm_gpu_phys_address_t phys_addr = {0};
|
||||
@ -1099,9 +1100,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
}
|
||||
TEST_CHECK_RET(va_block);
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(NULL);
|
||||
TEST_CHECK_RET(va_block_context);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, 0, gpu->id), gpu->id);
|
||||
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
|
||||
if (is_resident) {
|
||||
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
|
||||
phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
|
||||
@ -1109,6 +1113,8 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
uvm_va_block_context_free(va_block_context);
|
||||
|
||||
TEST_CHECK_RET(is_resident);
|
||||
|
||||
// Perform the lookup for the whole root chunk
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include "uvm_processors.h"
|
||||
|
||||
static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
|
||||
const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
|
||||
const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
|
||||
|
||||
NV_STATUS uvm_processor_mask_cache_init(void)
|
||||
{
|
||||
|
@ -522,6 +522,9 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
|
||||
uvm_processor_id_t, \
|
||||
uvm_id_from_value)
|
||||
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
|
||||
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
|
||||
|
||||
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
|
||||
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
|
||||
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
|
||||
@ -567,6 +570,10 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
|
||||
(uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
|
||||
i = uvm_gpu_id_next(i))
|
||||
|
||||
// Helper to iterate over all sub processor indexes.
|
||||
#define for_each_sub_processor_index(i) \
|
||||
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
|
||||
|
||||
// Helper to iterate over all valid processor ids.
|
||||
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
|
||||
|
||||
|
@ -41,15 +41,11 @@
|
||||
static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NvU64 retained_count = 0;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
parent_gpu = uvm_parent_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (parent_gpu)
|
||||
gpu = uvm_gpu_get_by_parent_and_swizz_id(parent_gpu, params->swizz_id);
|
||||
|
||||
gpu = uvm_gpu_get_by_uuid(¶ms->gpu_uuid);
|
||||
if (gpu != NULL)
|
||||
retained_count = uvm_gpu_retained_count(gpu);
|
||||
|
||||
|
@ -40,7 +40,6 @@ typedef struct
|
||||
{
|
||||
// In params
|
||||
NvProcessorUuid gpu_uuid;
|
||||
NvU32 swizz_id;
|
||||
// Out params
|
||||
NvU64 ref_count NV_ALIGN_BYTES(8);
|
||||
NV_STATUS rmStatus;
|
||||
@ -192,7 +191,7 @@ typedef struct
|
||||
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
|
||||
NvProcessorUuid preferred_location; // Out
|
||||
NvS32 preferred_cpu_nid; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 accessed_by_count; // Out
|
||||
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
|
||||
union
|
||||
@ -505,7 +504,12 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
// In params
|
||||
UvmEventEntry entry; // contains only NvUxx types
|
||||
union
|
||||
{
|
||||
UvmEventEntry_V1 entry_v1; // contains only NvUxx types
|
||||
UvmEventEntry_V2 entry_v2; // contains only NvUxx types
|
||||
};
|
||||
NvU32 version;
|
||||
NvU32 count;
|
||||
|
||||
// Out param
|
||||
@ -620,7 +624,7 @@ typedef struct
|
||||
|
||||
// Array of processors which have a resident copy of the page containing
|
||||
// lookup_address.
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 resident_on_count; // Out
|
||||
|
||||
// If the memory is resident on the CPU, the NUMA node on which the page
|
||||
@ -631,24 +635,24 @@ typedef struct
|
||||
// system-page-sized portion of this allocation which contains
|
||||
// lookup_address is guaranteed to be resident on the corresponding
|
||||
// processor.
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// The physical address of the physical allocation backing lookup_address.
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
|
||||
// Array of processors which have a virtual mapping covering lookup_address.
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
|
||||
NvU32 mapped_on_count; // Out
|
||||
|
||||
// The size of the virtual mapping covering lookup_address on each
|
||||
// mapped_on processor.
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
|
||||
// Array of processors which have physical memory populated that would back
|
||||
// lookup_address if it was resident.
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
|
||||
NvU32 populated_on_count; // Out
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -52,8 +52,19 @@ typedef enum
|
||||
|
||||
typedef unsigned long long UvmStream;
|
||||
|
||||
#define UVM_MAX_GPUS NV_MAX_DEVICES
|
||||
#define UVM_MAX_PROCESSORS (UVM_MAX_GPUS + 1)
|
||||
// The maximum number of GPUs changed when multiple MIG instances per
|
||||
// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
|
||||
#define UVM_MAX_GPUS_V1 NV_MAX_DEVICES
|
||||
#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
|
||||
#define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
|
||||
#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
|
||||
#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
|
||||
|
||||
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
|
||||
|
||||
#define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1)
|
||||
#define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2)
|
||||
@ -152,6 +163,8 @@ typedef enum {
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC enabled,
|
||||
// or the GPU instance UUID of the partition.
|
||||
NvProcessorUuid gpuUuid;
|
||||
NvU32 gpuMappingType; // UvmGpuMappingType
|
||||
NvU32 gpuCachingType; // UvmGpuCachingType
|
||||
@ -410,7 +423,29 @@ typedef struct
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo;
|
||||
} UvmEventCpuFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be 1st argument of this structure. Setting eventType to
|
||||
// UvmEventTypeMemoryViolation helps to identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 accessType; // read/write violation (UvmEventMemoryAccessType)
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets.
|
||||
//
|
||||
NvU16 padding16Bits;
|
||||
NvS32 nid; // NUMA node ID of faulting CPU
|
||||
NvU64 address; // faulting address
|
||||
NvU64 timeStamp; // cpu time when the fault occurred
|
||||
NvU32 pid; // process id causing the fault
|
||||
NvU32 threadId; // thread id causing the fault
|
||||
NvU64 pc; // address of the instruction causing the fault
|
||||
} UvmEventCpuFaultInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@ -567,7 +602,49 @@ typedef struct
|
||||
// on the gpu
|
||||
NvU64 endTimeStampGpu; // time stamp when the migration finished
|
||||
// on the gpu
|
||||
} UvmEventMigrationInfo;
|
||||
} UvmEventMigrationInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure. Setting eventType
|
||||
// to UvmEventTypeMigration helps to identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// Cause that triggered the migration
|
||||
//
|
||||
NvU8 migrationCause;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU16 padding16Bits;
|
||||
//
|
||||
// Indices are used for the source and destination of migration instead of
|
||||
// using gpu uuid/cpu id. This reduces the size of each event. The index to
|
||||
// gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable.
|
||||
// Currently we do not distinguish between CPUs so they all use index 0.
|
||||
//
|
||||
NvU16 srcIndex; // source CPU/GPU index
|
||||
NvU16 dstIndex; // destination CPU/GPU index
|
||||
NvS32 srcNid; // source CPU NUMA node ID
|
||||
NvS32 dstNid; // destination CPU NUMA node ID
|
||||
NvU64 address; // base virtual addr used for migration
|
||||
NvU64 migratedBytes; // number of bytes migrated
|
||||
NvU64 beginTimeStamp; // cpu time stamp when the memory transfer
|
||||
// was queued on the gpu
|
||||
NvU64 endTimeStamp; // cpu time stamp when the memory transfer
|
||||
// finalization was communicated to the cpu
|
||||
// For asynchronous operations this field
|
||||
// will be zero
|
||||
NvU64 rangeGroupId; // range group tied with this migration
|
||||
NvU64 beginTimeStampGpu; // time stamp when the migration started
|
||||
// on the gpu
|
||||
NvU64 endTimeStampGpu; // time stamp when the migration finished
|
||||
// on the gpu
|
||||
} UvmEventMigrationInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@ -633,7 +710,64 @@ typedef struct
|
||||
//
|
||||
NvU8 padding8Bits;
|
||||
NvU16 padding16Bits;
|
||||
} UvmEventGpuFaultInfo;
|
||||
} UvmEventGpuFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeGpuFault helps to identify event data in
|
||||
// a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_1;
|
||||
union
|
||||
{
|
||||
NvU16 gpcId; // If this is a replayable fault, this field contains
|
||||
// the physical GPC index where the fault was
|
||||
// triggered
|
||||
|
||||
NvU16 channelId; // If this is a non-replayable fault, this field
|
||||
// contains the id of the channel that launched the
|
||||
// operation that caused the fault.
|
||||
//
|
||||
// TODO: Bug 3283289: this field is ambiguous for
|
||||
// Ampere+ GPUs, but it is never consumed by clients.
|
||||
};
|
||||
NvU16 clientId; // Id of the MMU client that triggered the fault. This
|
||||
// is the value provided by HW and is architecture-
|
||||
// specific. There are separate client ids for
|
||||
// different client types (See dev_fault.h).
|
||||
NvU64 address; // virtual address at which gpu faulted
|
||||
NvU64 timeStamp; // time stamp when the cpu started processing the
|
||||
// fault
|
||||
NvU64 timeStampGpu; // gpu time stamp when the fault entry was written
|
||||
// in the fault buffer
|
||||
NvU32 batchId; // Per-GPU unique id to identify the faults serviced
|
||||
// in batch before:
|
||||
// - Issuing a replay for replayable faults
|
||||
// - Re-scheduling the channel for non-replayable
|
||||
// faults.
|
||||
NvU8 clientType; // Volta+ GPUs can fault on clients other than GR.
|
||||
// UvmEventFaultClientTypeGpc indicates replayable
|
||||
// fault, while UvmEventFaultClientTypeHub indicates
|
||||
// non-replayable fault.
|
||||
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8Bits_2;
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
} UvmEventGpuFaultInfo_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This info is provided when a gpu fault is replayed (for replayable faults)
|
||||
@ -666,7 +800,25 @@ typedef struct
|
||||
// accesses is queued on the gpu
|
||||
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
|
||||
// executing on the gpu
|
||||
} UvmEventGpuFaultReplayInfo;
|
||||
} UvmEventGpuFaultReplayInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 clientType; // See clientType in UvmEventGpuFaultInfo
|
||||
NvU16 gpuIndex; // GPU that experienced the fault
|
||||
NvU32 batchId; // Per-GPU unique id to identify the faults that
|
||||
// have been serviced in batch
|
||||
NvU64 timeStamp; // cpu time when the replay of the faulting memory
|
||||
// accesses is queued on the gpu
|
||||
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
|
||||
// executing on the gpu
|
||||
} UvmEventGpuFaultReplayInfo_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// This info is provided per fatal fault
|
||||
@ -689,7 +841,26 @@ typedef struct
|
||||
NvU16 padding16bits;
|
||||
NvU64 address; // virtual address at which the processor faulted
|
||||
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
|
||||
} UvmEventFatalFaultInfo;
|
||||
} UvmEventFatalFaultInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeFatalFault helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType. Only
|
||||
// valid if processorIndex is a GPU
|
||||
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
|
||||
NvU8 reason; // reason why the fault is fatal, refer
|
||||
// UvmEventFatalReason
|
||||
NvU16 processorIndex; // processor that experienced the fault
|
||||
NvU16 padding16bits;
|
||||
NvU64 address; // virtual address at which the processor faulted
|
||||
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
|
||||
} UvmEventFatalFaultInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -718,7 +889,38 @@ typedef struct
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInfo;
|
||||
} UvmEventReadDuplicateInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeReadDuplicate helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 size; // size in bytes of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 timeStamp; // cpu time stamp when the memory region becomes
|
||||
// read-duplicate. Since many processors can
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
|
||||
// mask that specifies in which processors this
|
||||
// memory region is read-duplicated. This is last
|
||||
// so UVM_PROCESSOR_MASK_SIZE can grow.
|
||||
} UvmEventReadDuplicateInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -728,13 +930,13 @@ typedef struct
|
||||
// identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
|
||||
// valid copy of the memory region
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
|
||||
// valid copy of the memory region
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
@ -746,8 +948,34 @@ typedef struct
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInvalidateInfo;
|
||||
} UvmEventReadDuplicateInvalidateInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to
|
||||
// identify event data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 padding8bits;
|
||||
NvU16 residentIndex;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 size; // size of the memory region that is
|
||||
// read-duplicated
|
||||
NvU64 timeStamp; // cpu time stamp when the memory region is no
|
||||
// longer read-duplicate. Since many processors can
|
||||
// participate in read-duplicate this is time stamp
|
||||
// when all the operations have been pushed to all
|
||||
// the processors.
|
||||
} UvmEventReadDuplicateInvalidateInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -770,7 +998,30 @@ typedef struct
|
||||
// changed
|
||||
NvU64 timeStamp; // cpu time stamp when the new page size is
|
||||
// queued on the gpu
|
||||
} UvmEventPageSizeChangeInfo;
|
||||
} UvmEventPageSizeChangeInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypePageSizeChange helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 processorIndex; // cpu/gpu processor index for which the page size
|
||||
// changed
|
||||
NvU32 size; // new page size
|
||||
NvU64 address; // virtual address of the page whose size has
|
||||
// changed
|
||||
NvU64 timeStamp; // cpu time stamp when the new page size is
|
||||
// queued on the gpu
|
||||
} UvmEventPageSizeChangeInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -794,7 +1045,33 @@ typedef struct
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when thrashing is detected
|
||||
} UvmEventThrashingDetectedInfo;
|
||||
} UvmEventThrashingDetectedInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrashingDetected helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when thrashing is detected
|
||||
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
|
||||
// mask that specifies which processors are
|
||||
// fighting for this memory region. This is last
|
||||
// so UVM_PROCESSOR_MASK_SIZE can grow.
|
||||
} UvmEventThrashingDetectedInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -815,7 +1092,28 @@ typedef struct
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu start time stamp for the throttling operation
|
||||
} UvmEventThrottlingStartInfo;
|
||||
} UvmEventThrottlingStartInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrottlingStart helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu start time stamp for the throttling operation
|
||||
} UvmEventThrottlingStartInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -836,7 +1134,28 @@ typedef struct
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu end time stamp for the throttling operation
|
||||
} UvmEventThrottlingEndInfo;
|
||||
} UvmEventThrottlingEndInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeThrottlingEnd helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits[2];
|
||||
NvU16 processorIndex; // index of the cpu/gpu that was throttled
|
||||
NvU64 address; // address of the page whose servicing is being
|
||||
// throttled
|
||||
NvU64 timeStamp; // cpu end time stamp for the throttling operation
|
||||
} UvmEventThrottlingEndInfo_V2;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@ -892,7 +1211,36 @@ typedef struct
|
||||
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
|
||||
// the processor specified by srcIndex. If srcIndex
|
||||
// is a cpu, this field will be zero.
|
||||
} UvmEventMapRemoteInfo;
|
||||
} UvmEventMapRemoteInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeMapRemote helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
NvU8 mapRemoteCause; // field to type UvmEventMapRemoteCause that tells
|
||||
// the cause for the page to be mapped remotely
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU16 padding16bits;
|
||||
NvU16 srcIndex; // index of the cpu/gpu being remapped
|
||||
NvU16 dstIndex; // index of the cpu/gpu memory that contains the
|
||||
// memory region data
|
||||
NvU64 address; // virtual address of the memory region that is
|
||||
// thrashing
|
||||
NvU64 size; // size of the memory region that is thrashing
|
||||
NvU64 timeStamp; // cpu time stamp when all the required operations
|
||||
// have been pushed to the processor
|
||||
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
|
||||
// the processor specified by srcIndex. If srcIndex
|
||||
// is a cpu, this field will be zero.
|
||||
} UvmEventMapRemoteInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -918,7 +1266,33 @@ typedef struct
|
||||
NvU64 addressIn; // virtual address that caused the eviction
|
||||
NvU64 size; // size of the memory region that being evicted
|
||||
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
|
||||
} UvmEventEvictionInfo;
|
||||
} UvmEventEvictionInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeEviction helps to identify event data
|
||||
// in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
NvU16 srcIndex; // index of the cpu/gpu from which data is being
|
||||
// evicted
|
||||
NvU16 dstIndex; // index of the cpu/gpu memory to which data is
|
||||
// going to be stored
|
||||
NvU64 addressOut; // virtual address of the memory region that is
|
||||
// being evicted
|
||||
NvU64 addressIn; // virtual address that caused the eviction
|
||||
NvU64 size; // size of the memory region that being evicted
|
||||
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
|
||||
} UvmEventEvictionInfo_V2;
|
||||
|
||||
// TODO: Bug 1870362: [uvm] Provide virtual address and processor index in
|
||||
// AccessCounter events
|
||||
@ -978,7 +1352,44 @@ typedef struct
|
||||
NvU32 bank;
|
||||
NvU64 address;
|
||||
NvU64 instancePtr;
|
||||
} UvmEventTestAccessCounterInfo;
|
||||
} UvmEventTestAccessCounterInfo_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
//
|
||||
// eventType has to be the 1st argument of this structure.
|
||||
// Setting eventType = UvmEventTypeAccessCounter helps to identify event
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
// See uvm_access_counter_buffer_entry_t for details
|
||||
NvU8 aperture;
|
||||
NvU8 instancePtrAperture;
|
||||
NvU8 isVirtual;
|
||||
NvU8 isFromCpu;
|
||||
NvU8 veId;
|
||||
|
||||
// The physical access counter notification was triggered on a managed
|
||||
// memory region. This is not set for virtual access counter notifications.
|
||||
NvU8 physOnManaged;
|
||||
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 srcIndex; // index of the gpu that received the access counter
|
||||
// notification
|
||||
NvU16 padding16bits;
|
||||
NvU32 value;
|
||||
NvU32 subGranularity;
|
||||
NvU32 tag;
|
||||
NvU32 bank;
|
||||
NvU32 padding32bits;
|
||||
NvU64 address;
|
||||
NvU64 instancePtr;
|
||||
} UvmEventTestAccessCounterInfo_V2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -998,30 +1409,64 @@ typedef struct
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo cpuFault;
|
||||
UvmEventMigrationInfo migration;
|
||||
UvmEventGpuFaultInfo gpuFault;
|
||||
UvmEventGpuFaultReplayInfo gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo fatalFault;
|
||||
UvmEventReadDuplicateInfo readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo thrashing;
|
||||
UvmEventThrottlingStartInfo throttlingStart;
|
||||
UvmEventThrottlingEndInfo throttlingEnd;
|
||||
UvmEventMapRemoteInfo mapRemote;
|
||||
UvmEventEvictionInfo eviction;
|
||||
UvmEventCpuFaultInfo_V1 cpuFault;
|
||||
UvmEventMigrationInfo_V1 migration;
|
||||
UvmEventGpuFaultInfo_V1 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo_V1 fatalFault;
|
||||
UvmEventReadDuplicateInfo_V1 readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo_V1 pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo_V1 thrashing;
|
||||
UvmEventThrottlingStartInfo_V1 throttlingStart;
|
||||
UvmEventThrottlingEndInfo_V1 throttlingEnd;
|
||||
UvmEventMapRemoteInfo_V1 mapRemote;
|
||||
UvmEventEvictionInfo_V1 eviction;
|
||||
} eventData;
|
||||
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
|
||||
UvmEventTestAccessCounterInfo accessCounter;
|
||||
UvmEventTestAccessCounterInfo_V1 accessCounter;
|
||||
UvmEventTestSplitInvalidateInfo splitInvalidate;
|
||||
} testEventData;
|
||||
};
|
||||
} UvmEventEntry;
|
||||
} UvmEventEntry_V1;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
UvmEventMigrationInfo_Lite migration_Lite;
|
||||
|
||||
UvmEventCpuFaultInfo_V2 cpuFault;
|
||||
UvmEventMigrationInfo_V2 migration;
|
||||
UvmEventGpuFaultInfo_V2 gpuFault;
|
||||
UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
|
||||
UvmEventFatalFaultInfo_V2 fatalFault;
|
||||
UvmEventReadDuplicateInfo_V2 readDuplicate;
|
||||
UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate;
|
||||
UvmEventPageSizeChangeInfo_V2 pageSizeChange;
|
||||
UvmEventThrashingDetectedInfo_V2 thrashing;
|
||||
UvmEventThrottlingStartInfo_V2 throttlingStart;
|
||||
UvmEventThrottlingEndInfo_V2 throttlingEnd;
|
||||
UvmEventMapRemoteInfo_V2 mapRemote;
|
||||
UvmEventEvictionInfo_V2 eviction;
|
||||
} eventData;
|
||||
|
||||
union
|
||||
{
|
||||
NvU8 eventType;
|
||||
|
||||
UvmEventTestAccessCounterInfo_V2 accessCounter;
|
||||
UvmEventTestSplitInvalidateInfo splitInvalidate;
|
||||
} testEventData;
|
||||
};
|
||||
} UvmEventEntry_V2;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Type of time stamp used in the event entry:
|
||||
@ -1060,7 +1505,12 @@ typedef enum
|
||||
UvmDebugAccessTypeWrite = 1,
|
||||
} UvmDebugAccessType;
|
||||
|
||||
typedef struct UvmEventControlData_tag {
|
||||
typedef enum {
|
||||
UvmToolsEventQueueVersion_V1 = 1,
|
||||
UvmToolsEventQueueVersion_V2 = 2,
|
||||
} UvmToolsEventQueueVersion;
|
||||
|
||||
typedef struct UvmEventControlData_V1_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
@ -1070,7 +1520,30 @@ typedef struct UvmEventControlData_tag {
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData;
|
||||
} UvmToolsEventControlData_V1;
|
||||
|
||||
typedef struct UvmEventControlData_V2_tag {
|
||||
// entries between get_ahead and get_behind are currently being read
|
||||
volatile NvU32 get_ahead;
|
||||
volatile NvU32 get_behind;
|
||||
|
||||
// entries between put_ahead and put_behind are currently being written
|
||||
volatile NvU32 put_ahead;
|
||||
volatile NvU32 put_behind;
|
||||
|
||||
// The version values are limited to UvmToolsEventQueueVersion and
|
||||
// initialized by UvmToolsCreateEventQueue().
|
||||
NvU32 version;
|
||||
NvU32 padding32Bits;
|
||||
|
||||
// counter of dropped events
|
||||
NvU64 dropped[UvmEventNumTypesAll];
|
||||
} UvmToolsEventControlData_V2;
|
||||
|
||||
// For backward compatibility:
|
||||
// TODO: Bug 4465348: remove these after replacing old references.
|
||||
typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
|
||||
typedef UvmEventEntry_V1 UvmEventEntry;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UVM Tools forward types (handles) definitions
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -706,11 +706,6 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
|
||||
// mm is used to initialize the value of va_block_context->mm. NULL is allowed.
|
||||
void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);
|
||||
|
||||
// Return the preferred NUMA node ID for the block's policy.
|
||||
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
|
||||
// is returned.
|
||||
int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
// and page masks could simplify the below APIs and their implementations
|
||||
// at the cost of having to scan the whole mask for small regions.
|
||||
@ -1546,7 +1541,11 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
|
||||
// The [src, src + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock
|
||||
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size);
|
||||
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_mem_t *dst,
|
||||
NvU64 src,
|
||||
size_t size);
|
||||
|
||||
// Initialize va block retry tracking
|
||||
void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
|
||||
@ -2090,11 +2089,14 @@ void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
|
||||
|
||||
// Count how many processors have a copy of the given page resident in their
|
||||
// memory.
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get the processor with a resident copy of a page closest to the given
|
||||
// processor.
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor);
|
||||
|
||||
@ -2127,6 +2129,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
|
||||
int nid,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk for the given page_index from the first available NUMA
|
||||
// node from the va_block. Should only be called for HMM va_blocks.
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_any_chunk_for_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Return the struct page * from the chunk corresponding to the given page_index
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
@ -2241,6 +2248,7 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
// Return the maximum mapping protection for processor_id that will not require
|
||||
// any permision revocation on the rest of processors.
|
||||
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
|
@ -175,6 +175,14 @@ typedef struct
|
||||
// Scratch node mask. This follows the same rules as scratch_page_mask;
|
||||
nodemask_t scratch_node_mask;
|
||||
|
||||
// Available as scratch space for the internal APIs. This is like a caller-
|
||||
// save register: it shouldn't be used across function calls which also take
|
||||
// this va_block_context.
|
||||
uvm_processor_mask_t scratch_processor_mask;
|
||||
|
||||
// Temporary mask in block_add_eviction_mappings().
|
||||
uvm_processor_mask_t map_processors_eviction;
|
||||
|
||||
// State used by uvm_va_block_make_resident
|
||||
struct uvm_make_resident_context_struct
|
||||
{
|
||||
@ -233,6 +241,16 @@ typedef struct
|
||||
// are removed as the operation progresses.
|
||||
uvm_page_mask_t revoke_running_page_mask;
|
||||
|
||||
// Mask used by block_gpu_split_2m and block_gpu_split_big to track
|
||||
// splitting of big PTEs but they are never called concurrently. This
|
||||
// mask can be used concurrently with other page masks.
|
||||
uvm_page_mask_t big_split_page_mask;
|
||||
|
||||
// Mask used by block_unmap_gpu to track non_uvm_lite_gpus which have
|
||||
// this block mapped. This mask can be used concurrently with other page
|
||||
// masks.
|
||||
uvm_processor_mask_t non_uvm_lite_gpus;
|
||||
|
||||
uvm_page_mask_t page_mask;
|
||||
uvm_page_mask_t filtered_page_mask;
|
||||
uvm_page_mask_t migratable_mask;
|
||||
@ -276,6 +294,10 @@ typedef struct
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Temporary mask used in uvm_hmm_block_add_eviction_mappings().
|
||||
uvm_processor_mask_t map_processors_eviction;
|
||||
|
||||
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
|
||||
struct migrate_vma migrate_vma_args;
|
||||
#endif
|
||||
|
@ -1799,7 +1799,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
|
||||
|
||||
if (uvm_api_range_invalid(params->base, params->length))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS)
|
||||
if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -86,11 +86,13 @@ static void init_tools_data(uvm_va_space_t *va_space)
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.counters); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.counters + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v1); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues_v1 + i);
|
||||
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v2); i++)
|
||||
INIT_LIST_HEAD(va_space->tools.queues_v2 + i);
|
||||
}
|
||||
|
||||
static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||||
static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_gpu_t *other_gpu;
|
||||
|
||||
@ -104,7 +106,7 @@ static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *
|
||||
|
||||
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
|
||||
|
||||
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1) {
|
||||
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1 || gpu->parent == other_gpu->parent) {
|
||||
NV_STATUS status = enable_peers(va_space, gpu, other_gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -324,10 +326,16 @@ static void unregister_gpu(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling)
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
|
||||
|
||||
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
|
||||
uvm_processor_mask_clear(&va_space->non_faultable_processors, gpu->id);
|
||||
}
|
||||
|
||||
processor_mask_array_clear(va_space->can_access, gpu->id, gpu->id);
|
||||
processor_mask_array_clear(va_space->can_access, gpu->id, UVM_ID_CPU);
|
||||
@ -514,7 +522,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
|
||||
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
}
|
||||
|
||||
// Check that all CPU/GPU affinity masks are empty
|
||||
@ -604,7 +612,7 @@ uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProces
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid))
|
||||
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
|
||||
return gpu;
|
||||
}
|
||||
|
||||
@ -663,7 +671,8 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
NvBool *numa_enabled,
|
||||
NvS32 *numa_node_id)
|
||||
NvS32 *numa_node_id,
|
||||
NvProcessorUuid *uuid_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
@ -675,13 +684,15 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_uuid_copy(uuid_out, &gpu->uuid);
|
||||
|
||||
// Enabling access counters requires taking the ISR lock, so it is done
|
||||
// without holding the (deeper order) VA space lock. Enabling the counters
|
||||
// after dropping the VA space lock would create a window of time in which
|
||||
// another thread could see the GPU as registered, but access counters would
|
||||
// be disabled. Therefore, the counters are enabled before taking the VA
|
||||
// space lock.
|
||||
if (uvm_gpu_access_counters_required(gpu->parent)) {
|
||||
if (uvm_parent_gpu_access_counters_required(gpu->parent)) {
|
||||
status = uvm_gpu_access_counters_enable(gpu, va_space);
|
||||
if (status != NV_OK) {
|
||||
uvm_gpu_release(gpu);
|
||||
@ -726,10 +737,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
|
||||
|
||||
if (gpu->parent->isr.replayable_faults.handling) {
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
||||
uvm_processor_mask_set(&va_space->faultable_processors, gpu->id);
|
||||
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
|
||||
// System-wide atomics are enabled by default
|
||||
uvm_processor_mask_set(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(!uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
|
||||
uvm_processor_mask_set(&va_space->non_faultable_processors, gpu->id);
|
||||
}
|
||||
|
||||
// All GPUs have native atomics on their own memory
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
|
||||
@ -785,7 +803,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
status = register_gpu_nvlink_peers(va_space, gpu);
|
||||
status = register_gpu_peers(va_space, gpu);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
@ -822,9 +840,9 @@ done:
|
||||
if (status != NV_OK) {
|
||||
// There is no risk of disabling access counters on a previously
|
||||
// registered GPU: the enablement step would have failed before even
|
||||
// discovering that the GPU is already registed.
|
||||
if (uvm_gpu_access_counters_required(gpu->parent))
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
// discovering that the GPU is already registered.
|
||||
if (uvm_parent_gpu_access_counters_required(gpu->parent))
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
}
|
||||
@ -876,15 +894,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
||||
// it from the VA space until we're done.
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
// If uvm_gpu_access_counters_required(gpu->parent) is true, a concurrent
|
||||
// registration could enable access counters after they are disabled here.
|
||||
// If uvm_parent_gpu_access_counters_required(gpu->parent) is true, a
|
||||
// concurrent registration could enable access counters after they are
|
||||
// disabled here.
|
||||
// The concurrent registration will fail later on if it acquires the VA
|
||||
// space lock before the unregistration does (because the GPU is still
|
||||
// registered) and undo the access counters enablement, or succeed if it
|
||||
// acquires the VA space lock after the unregistration does. Both outcomes
|
||||
// result on valid states.
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
|
||||
// mmap_lock is needed to establish CPU mappings to any pages evicted from
|
||||
// the GPU if accessed by CPU is set for them.
|
||||
@ -1040,6 +1059,10 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
|
||||
processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
|
||||
}
|
||||
}
|
||||
else if (gpu0->parent == gpu1->parent) {
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu0->id, gpu1->id);
|
||||
processor_mask_array_set(va_space->has_native_atomics, gpu1->id, gpu0->id);
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_space_check_processors_masks(va_space));
|
||||
__set_bit(table_index, va_space->enabled_peers);
|
||||
@ -1091,6 +1114,7 @@ static NV_STATUS retain_pcie_peers_from_uuids(uvm_va_space_t *va_space,
|
||||
static bool uvm_va_space_pcie_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
return !processor_mask_array_test(va_space->has_nvlink, gpu0->id, gpu1->id) &&
|
||||
gpu0->parent != gpu1->parent &&
|
||||
uvm_va_space_peer_enabled(va_space, gpu0, gpu1);
|
||||
}
|
||||
|
||||
|
@ -163,6 +163,10 @@ struct uvm_va_space_struct
|
||||
// faults.
|
||||
uvm_processor_mask_t faultable_processors;
|
||||
|
||||
// Mask of processors registered with the va space that don't support
|
||||
// faulting.
|
||||
uvm_processor_mask_t non_faultable_processors;
|
||||
|
||||
// This is a count of non fault capable processors with a GPU VA space
|
||||
// registered.
|
||||
NvU32 num_non_faultable_gpu_va_spaces;
|
||||
@ -261,8 +265,8 @@ struct uvm_va_space_struct
|
||||
// Mask of processors that are participating in system-wide atomics
|
||||
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
||||
|
||||
// Mask of GPUs where access counters are enabled on this VA space
|
||||
uvm_processor_mask_t access_counters_enabled_processors;
|
||||
// Mask of physical GPUs where access counters are enabled on this VA space
|
||||
uvm_parent_processor_mask_t access_counters_enabled_processors;
|
||||
|
||||
// Array with information regarding CPU/GPU NUMA affinity. There is one
|
||||
// entry per CPU NUMA node. Entries in the array are populated sequentially
|
||||
@ -308,7 +312,8 @@ struct uvm_va_space_struct
|
||||
|
||||
// Lists of counters listening for events on this VA space
|
||||
struct list_head counters[UVM_TOTAL_COUNTERS];
|
||||
struct list_head queues[UvmEventNumTypesAll];
|
||||
struct list_head queues_v1[UvmEventNumTypesAll];
|
||||
struct list_head queues_v2[UvmEventNumTypesAll];
|
||||
|
||||
// Node for this va_space in global subscribers list
|
||||
struct list_head node;
|
||||
@ -399,7 +404,7 @@ static void uvm_va_space_processor_uuid(uvm_va_space_t *va_space, NvProcessorUui
|
||||
else {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
UVM_ASSERT(gpu);
|
||||
memcpy(uuid, uvm_gpu_uuid(gpu), sizeof(*uuid));
|
||||
memcpy(uuid, &gpu->uuid, sizeof(*uuid));
|
||||
}
|
||||
}
|
||||
|
||||
@ -472,9 +477,9 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space);
|
||||
uvm_mutex_unlock(&(__va_space)->serialize_writers_lock); \
|
||||
} while (0)
|
||||
|
||||
// Get a registered gpu by uuid. This restricts the search for GPUs, to those that
|
||||
// have been registered with a va_space. This returns NULL if the GPU is not present, or not
|
||||
// registered with the va_space.
|
||||
// Get a registered gpu by uuid. This restricts the search for GPUs, to those
|
||||
// that have been registered with a va_space. This returns NULL if the GPU is
|
||||
// not present, or not registered with the va_space.
|
||||
//
|
||||
// LOCKING: The VA space lock must be held.
|
||||
uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
|
||||
@ -501,13 +506,19 @@ bool uvm_va_space_can_read_duplicate(uvm_va_space_t *va_space, uvm_gpu_t *changi
|
||||
// Register a gpu in the va space
|
||||
// Note that each gpu can be only registered once in a va space
|
||||
//
|
||||
// The input gpu_uuid is for the phyisical GPU. The user_rm_va_space argument
|
||||
// identifies the SMC partition if provided and SMC is enabled.
|
||||
//
|
||||
// This call returns whether the GPU memory is a NUMA node in the kernel and the
|
||||
// corresponding node id.
|
||||
// It also returns the GI UUID (if gpu_uuid is a SMC partition) or a copy of
|
||||
// gpu_uuid if the GPU is not SMC capable or SMC is not enabled.
|
||||
NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_va_space,
|
||||
NvBool *numa_enabled,
|
||||
NvS32 *numa_node_id);
|
||||
NvS32 *numa_node_id,
|
||||
NvProcessorUuid *uuid_out);
|
||||
|
||||
// Unregister a gpu from the va space
|
||||
NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
|
||||
|
@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
|
||||
}
|
||||
}
|
||||
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
|
||||
uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
// Initialize MMU interval notifiers for this process. This allows
|
||||
// mmu_interval_notifier_insert() to be called without holding the
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -53,6 +53,7 @@
|
||||
(defined(CONFIG_CRYPTO_HMAC) || defined(CONFIG_CRYPTO_HMAC_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_ECDH) || defined(CONFIG_CRYPTO_ECDH_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_ECDSA) || defined(CONFIG_CRYPTO_ECDSA_MODULE)) && \
|
||||
(defined(CONFIG_CRYPTO_RSA) || defined(CONFIG_CRYPTO_RSA_MODULE)) && \
|
||||
(defined(CONFIG_X509_CERTIFICATE_PARSER) || defined(CONFIG_X509_CERTIFICATE_PARSER_MODULE))
|
||||
#define NV_CONFIG_CRYPTO_PRESENT 1
|
||||
#endif
|
||||
@ -151,4 +152,17 @@ bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public,
|
||||
bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size);
|
||||
|
||||
bool lkca_rsa_verify(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size);
|
||||
|
||||
bool lkca_rsa_pkcs1_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size);
|
||||
|
||||
bool lkca_rsa_pss_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size);
|
||||
|
||||
#endif
|
||||
|
611
kernel-open/nvidia/libspdm_rsa.c
Normal file
611
kernel-open/nvidia/libspdm_rsa.c
Normal file
@ -0,0 +1,611 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "internal_crypt_lib.h"
|
||||
#include "library/cryptlib.h"
|
||||
|
||||
#ifdef USE_LKCA
|
||||
#include <linux/module.h>
|
||||
#include <linux/mpi.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/internal/rsa.h>
|
||||
|
||||
/* ------------------------ Macros & Defines ------------------------------- */
|
||||
#define GET_MOST_SIGNIFICANT_BIT(keySize) (keySize > 0 ? ((keySize - 1) & 7) : 0)
|
||||
#define GET_ENC_MESSAGE_SIZE_BYTE(keySize) (keySize + 7) >> 3;
|
||||
#define PKCS1_MGF1_COUNTER_SIZE_BYTE (4)
|
||||
#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE (8)
|
||||
#define RSA_PSS_TRAILER_FIELD (0xbc)
|
||||
#define SHIFT_RIGHT_AND_GET_BYTE(val, x) ((val >> x) & 0xFF)
|
||||
#define BITS_TO_BYTES(b) (b >> 3)
|
||||
|
||||
static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
|
||||
|
||||
struct rsa_ctx
|
||||
{
|
||||
struct rsa_key key;
|
||||
bool pub_key_set;
|
||||
bool priv_key_set;
|
||||
int size;
|
||||
};
|
||||
#endif // #ifdef USE_LKCA
|
||||
|
||||
/*!
|
||||
* Creating and initializing a RSA context.
|
||||
*
|
||||
* @return : A void pointer points to a RSA context
|
||||
*
|
||||
*/
|
||||
void *libspdm_rsa_new
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return NULL;
|
||||
#else
|
||||
struct rsa_ctx *ctx;
|
||||
|
||||
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
||||
|
||||
if (ctx == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->pub_key_set = false;
|
||||
ctx->priv_key_set = false;
|
||||
|
||||
return ctx;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* To free a RSA context.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
*
|
||||
*/
|
||||
void libspdm_rsa_free
|
||||
(
|
||||
void *rsa_context
|
||||
)
|
||||
{
|
||||
#ifdef USE_LKCA
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
|
||||
if (ctx != NULL)
|
||||
{
|
||||
if (ctx->key.n) kfree(ctx->key.n);
|
||||
if (ctx->key.e) kfree(ctx->key.e);
|
||||
if (ctx->key.d) kfree(ctx->key.d);
|
||||
if (ctx->key.q) kfree(ctx->key.q);
|
||||
if (ctx->key.p) kfree(ctx->key.p);
|
||||
if (ctx->key.dq) kfree(ctx->key.dq);
|
||||
if (ctx->key.dp) kfree(ctx->key.dp);
|
||||
if (ctx->key.qinv) kfree(ctx->key.qinv);
|
||||
kfree(ctx);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define rsa_set_key_case(a, a_sz, A) \
|
||||
case A: \
|
||||
{ \
|
||||
if (ctx->key.a) { \
|
||||
kfree(ctx->key.a); \
|
||||
} \
|
||||
ctx->key.a = shadow_num; \
|
||||
ctx->key.a_sz = bn_size; \
|
||||
break; \
|
||||
}
|
||||
/*!
|
||||
* To set key into RSA context.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param key_tag : Indicate key tag for RSA key
|
||||
* @param big_number : A big nuMber buffer to store rsa KEY
|
||||
* @param bn_size : The size of bug number
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
bool libspdm_rsa_set_key
|
||||
(
|
||||
void *rsa_context,
|
||||
const libspdm_rsa_key_tag_t key_tag,
|
||||
const uint8_t *big_number,
|
||||
size_t bn_size
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
uint8_t *shadow_num;
|
||||
|
||||
if (ctx == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick sanity check if tag is valid
|
||||
switch (key_tag)
|
||||
{
|
||||
case LIBSPDM_RSA_KEY_N:
|
||||
case LIBSPDM_RSA_KEY_E:
|
||||
case LIBSPDM_RSA_KEY_D:
|
||||
case LIBSPDM_RSA_KEY_Q:
|
||||
case LIBSPDM_RSA_KEY_P:
|
||||
case LIBSPDM_RSA_KEY_DP:
|
||||
case LIBSPDM_RSA_KEY_DQ:
|
||||
case LIBSPDM_RSA_KEY_Q_INV:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (big_number != NULL)
|
||||
{
|
||||
shadow_num = kmalloc(bn_size, GFP_KERNEL);
|
||||
if (shadow_num == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
memcpy(shadow_num, big_number, bn_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
shadow_num = NULL;
|
||||
bn_size = 0;
|
||||
}
|
||||
|
||||
switch (key_tag)
|
||||
{
|
||||
rsa_set_key_case(n, n_sz, LIBSPDM_RSA_KEY_N)
|
||||
rsa_set_key_case(e, e_sz, LIBSPDM_RSA_KEY_E)
|
||||
rsa_set_key_case(d, d_sz, LIBSPDM_RSA_KEY_D)
|
||||
rsa_set_key_case(q, q_sz, LIBSPDM_RSA_KEY_Q)
|
||||
rsa_set_key_case(p, p_sz, LIBSPDM_RSA_KEY_P)
|
||||
rsa_set_key_case(dq, dq_sz, LIBSPDM_RSA_KEY_DQ)
|
||||
rsa_set_key_case(dp, dp_sz, LIBSPDM_RSA_KEY_DP)
|
||||
rsa_set_key_case(qinv, qinv_sz, LIBSPDM_RSA_KEY_Q_INV)
|
||||
default:
|
||||
// We can't get here ever
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* Perform PKCS1 MGF1 operation.
|
||||
*
|
||||
* @param mask : A mask pointer to store return data
|
||||
* @param maskedDB_length : Indicate mask data block length
|
||||
* @param seed : A seed pointer to store random values
|
||||
* @param seed_length : The seed length
|
||||
* @param hash_nid : The hash NID
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
static bool NV_PKCS1_MGF1
|
||||
(
|
||||
uint8_t *mask,
|
||||
size_t maskedDB_length,
|
||||
const uint8_t *seed,
|
||||
size_t seed_length,
|
||||
size_t hash_nid
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
size_t mdLength;
|
||||
size_t counter;
|
||||
size_t outLength;
|
||||
uint8_t counterBuf[4];
|
||||
void *sha384_ctx = NULL;
|
||||
uint8_t hash_value[LIBSPDM_SHA384_DIGEST_SIZE];
|
||||
bool status = false;
|
||||
|
||||
if (mask == NULL || seed == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only support SHA384 for MGF1 now.
|
||||
if (hash_nid == LIBSPDM_CRYPTO_NID_SHA384)
|
||||
{
|
||||
mdLength = LIBSPDM_SHA384_DIGEST_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
sha384_ctx = libspdm_sha384_new();
|
||||
|
||||
if (sha384_ctx == NULL)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_new() failed \n", __FUNCTION__);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (counter = 0, outLength = 0; outLength < maskedDB_length; counter++)
|
||||
{
|
||||
counterBuf[0] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 24);
|
||||
counterBuf[1] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 16);
|
||||
counterBuf[2] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 8);
|
||||
counterBuf[3] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 0);
|
||||
|
||||
status = libspdm_sha384_init(sha384_ctx);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_init() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, seed, seed_length);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() failed(seed) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, counterBuf, 4);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() failed(counterBuf) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (outLength + mdLength <= maskedDB_length)
|
||||
{
|
||||
status = libspdm_sha384_final(sha384_ctx, mask + outLength);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_final() failed (<= maskedDB_length) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
outLength += mdLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
status = libspdm_sha384_final(sha384_ctx, hash_value);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_final() failed(> maskedDB_length) !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
memcpy(mask + outLength, hash_value, maskedDB_length - outLength);
|
||||
outLength = maskedDB_length;
|
||||
}
|
||||
}
|
||||
status = true;
|
||||
|
||||
_error_exit:
|
||||
libspdm_sha384_free(sha384_ctx);
|
||||
return status;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
0xbc : Trailer Field
|
||||
+-----------+
|
||||
| M |
|
||||
+-----------+
|
||||
|
|
||||
V
|
||||
Hash
|
||||
|
|
||||
V
|
||||
+--------+----------+----------+
|
||||
M' = |Padding1| mHash | salt |
|
||||
+--------+----------+----------+
|
||||
|--------------|---------------|
|
||||
|
|
||||
+--------+----------+ V
|
||||
DB = |Padding2| salt | Hash
|
||||
+--------+----------+ |
|
||||
| |
|
||||
V |
|
||||
xor <--- MGF <---|
|
||||
| |
|
||||
| |
|
||||
V V
|
||||
+-------------------+----------+----+
|
||||
EM = | maskedDB | H |0xbc|
|
||||
+-------------------+----------+----+
|
||||
|
||||
salt : The random number, we hardcode its size as hash size here.
|
||||
M' : The concatenation of padding1 + message hash + salt
|
||||
MGF : Mask generation function.
|
||||
A mask generation function takes an octet string of variable length
|
||||
and a desired output length as input, and outputs an octet string of
|
||||
the desired length
|
||||
MGF1 is a Mask Generation Function based on a hash function.
|
||||
|
||||
Padding1 : 8 zeros
|
||||
Padding2 : 0x01
|
||||
|
||||
The detail spec is at https://datatracker.ietf.org/doc/html/rfc2437
|
||||
*/
|
||||
|
||||
/*!
|
||||
* Set keys and call PKCS1_MGF1 to generate signature.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param hash_nid : The hash NID
|
||||
* @param message_hash : The pointer to message hash
|
||||
* @param signature : The pointer is used to store generated signature
|
||||
* @param sig_size : For input, a pointer store signature buffer size.
|
||||
* For output, a pointer store generate signature size.
|
||||
* @param salt_Length : The salt length for RSA-PSS algorithm
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
static bool nvRsaPaddingAddPkcs1PssMgf1
|
||||
(
|
||||
void *rsa_context,
|
||||
size_t hash_nid,
|
||||
const uint8_t *message_hash,
|
||||
size_t hash_size,
|
||||
uint8_t *signature,
|
||||
size_t *sig_size,
|
||||
int salt_length
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
#else
|
||||
bool status = false;
|
||||
struct rsa_ctx *ctx = rsa_context;
|
||||
void *sha384_ctx = NULL;
|
||||
uint32_t keySize;
|
||||
uint32_t msBits;
|
||||
size_t emLength;
|
||||
uint8_t saltBuf[64];
|
||||
size_t maskedDB_length;
|
||||
size_t i;
|
||||
uint8_t *tmp_H;
|
||||
uint8_t *tmp_P;
|
||||
int rc;
|
||||
unsigned int ret_data_size;
|
||||
MPI mpi_n = NULL;
|
||||
MPI mpi_d = NULL;
|
||||
MPI mpi_c = mpi_alloc(0);
|
||||
MPI mpi_p = mpi_alloc(0);
|
||||
|
||||
// read modulus to BN struct
|
||||
mpi_n = mpi_read_raw_data(ctx->key.n, ctx->key.n_sz);
|
||||
if (mpi_n == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_n create failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// read private exponent to BN struct
|
||||
mpi_d = mpi_read_raw_data(ctx->key.d, ctx->key.d_sz);
|
||||
if (mpi_d == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_d create failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
keySize = mpi_n->nbits;
|
||||
msBits = GET_MOST_SIGNIFICANT_BIT(keySize);
|
||||
emLength = BITS_TO_BYTES(keySize);
|
||||
|
||||
if (msBits == 0)
|
||||
{
|
||||
*signature++ = 0;
|
||||
emLength--;
|
||||
}
|
||||
|
||||
if (emLength < hash_size + 2)
|
||||
{
|
||||
pr_err("%s : emLength < hash_size + 2 !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// Now, we only support salt_length == LIBSPDM_SHA384_DIGEST_SIZE
|
||||
if (salt_length != LIBSPDM_SHA384_DIGEST_SIZE ||
|
||||
hash_nid != LIBSPDM_CRYPTO_NID_SHA384)
|
||||
{
|
||||
pr_err("%s : Invalid salt_length (%x) \n", __FUNCTION__, salt_length);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
get_random_bytes(saltBuf, salt_length);
|
||||
|
||||
maskedDB_length = emLength - hash_size - 1;
|
||||
tmp_H = signature + maskedDB_length;
|
||||
sha384_ctx = libspdm_sha384_new();
|
||||
|
||||
if (sha384_ctx == NULL)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_new() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_init(sha384_ctx);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_init() failed !! \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, zeroes, sizeof(zeroes));
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_update() with zeros failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
status = libspdm_sha384_update(sha384_ctx, message_hash, hash_size);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s: libspdm_sha384_update() with message_hash failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (salt_length)
|
||||
{
|
||||
status = libspdm_sha384_update(sha384_ctx, saltBuf, salt_length);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_update() with saltBuf failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
}
|
||||
|
||||
status = libspdm_sha384_final(sha384_ctx, tmp_H);
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : libspdm_sha384_final() with tmp_H failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
/* Generate dbMask in place then perform XOR on it */
|
||||
status = NV_PKCS1_MGF1(signature, maskedDB_length, tmp_H, hash_size, hash_nid);
|
||||
|
||||
if (!status)
|
||||
{
|
||||
pr_err("%s : NV_PKCS1_MGF1() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
tmp_P = signature;
|
||||
tmp_P += emLength - salt_length - hash_size - 2;
|
||||
*tmp_P++ ^= 0x1;
|
||||
|
||||
if (salt_length > 0)
|
||||
{
|
||||
for (i = 0; i < salt_length; i++)
|
||||
{
|
||||
*tmp_P++ ^= saltBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (msBits)
|
||||
{
|
||||
signature[0] &= 0xFF >> (8 - msBits);
|
||||
}
|
||||
|
||||
/* H is already in place so just set final 0xbc */
|
||||
signature[emLength - 1] = RSA_PSS_TRAILER_FIELD;
|
||||
|
||||
// read signature to BN struct
|
||||
mpi_p = mpi_read_raw_data(signature, emLength);
|
||||
if (mpi_p == NULL)
|
||||
{
|
||||
pr_err("%s : mpi_p() create failed !!\n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
// Staring RSA encryption with private key over signature.
|
||||
rc = mpi_powm(mpi_c, mpi_p, mpi_d, mpi_n);
|
||||
if (rc != 0)
|
||||
{
|
||||
pr_err("%s : mpi_powm() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
rc = mpi_read_buffer(mpi_c, signature, *sig_size, &ret_data_size, NULL);
|
||||
if (rc != 0)
|
||||
{
|
||||
pr_err("%s : mpi_read_buffer() failed \n", __FUNCTION__);
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
if (ret_data_size > *sig_size)
|
||||
{
|
||||
goto _error_exit;
|
||||
}
|
||||
|
||||
*sig_size = ret_data_size;
|
||||
status = true;
|
||||
|
||||
_error_exit:
|
||||
|
||||
mpi_free(mpi_n);
|
||||
mpi_free(mpi_d);
|
||||
mpi_free(mpi_c);
|
||||
mpi_free(mpi_p);
|
||||
|
||||
libspdm_sha384_free(sha384_ctx);
|
||||
|
||||
return status;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* Perform RSA-PSS sigaature sign process with LKCA library.
|
||||
*
|
||||
* @param rsa_context : A RSA context pointer
|
||||
* @param hash_nid : The hash NID
|
||||
* @param message_hash : The pointer to message hash
|
||||
* @param signature : The pointer is used to store generated signature
|
||||
* @param sig_size : For input, a pointer store signature buffer size.
|
||||
* For output, a pointer store generate signature size.
|
||||
*
|
||||
* @Return : True if OK; otherwise return False
|
||||
*/
|
||||
bool lkca_rsa_pss_sign
|
||||
(
|
||||
void *rsa_context,
|
||||
size_t hash_nid,
|
||||
const uint8_t *message_hash,
|
||||
size_t hash_size,
|
||||
uint8_t *signature,
|
||||
size_t *sig_size
|
||||
)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return true;
|
||||
#else
|
||||
return nvRsaPaddingAddPkcs1PssMgf1(rsa_context,
|
||||
hash_nid,
|
||||
message_hash,
|
||||
hash_size,
|
||||
signature,
|
||||
sig_size,
|
||||
LIBSPDM_SHA384_DIGEST_SIZE);
|
||||
#endif
|
||||
}
|
||||
|
85
kernel-open/nvidia/libspdm_rsa_ext.c
Normal file
85
kernel-open/nvidia/libspdm_rsa_ext.c
Normal file
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
|
||||
* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
|
||||
*/
|
||||
|
||||
/** @file
|
||||
* RSA Asymmetric Cipher Wrapper Implementation.
|
||||
*
|
||||
* This file implements following APIs which provide more capabilities for RSA:
|
||||
* 1) rsa_pss_sign
|
||||
*
|
||||
* RFC 8017 - PKCS #1: RSA Cryptography Specifications version 2.2
|
||||
**/
|
||||
|
||||
#include "internal_crypt_lib.h"
|
||||
#include "library/cryptlib.h"
|
||||
|
||||
/**
|
||||
* Carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme.
|
||||
*
|
||||
* This function carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme defined in
|
||||
* RSA PKCS#1 v2.2.
|
||||
*
|
||||
* The salt length is same as digest length.
|
||||
*
|
||||
* If the signature buffer is too small to hold the contents of signature, false
|
||||
* is returned and sig_size is set to the required buffer size to obtain the signature.
|
||||
*
|
||||
* If rsa_context is NULL, then return false.
|
||||
* If message_hash is NULL, then return false.
|
||||
* If hash_size need match the hash_nid. nid could be SHA256, SHA384, SHA512, SHA3_256, SHA3_384, SHA3_512.
|
||||
* If sig_size is large enough but signature is NULL, then return false.
|
||||
*
|
||||
* @param[in] rsa_context Pointer to RSA context for signature generation.
|
||||
* @param[in] hash_nid hash NID
|
||||
* @param[in] message_hash Pointer to octet message hash to be signed.
|
||||
* @param[in] hash_size size of the message hash in bytes.
|
||||
* @param[out] signature Pointer to buffer to receive RSA-SSA PSS signature.
|
||||
* @param[in, out] sig_size On input, the size of signature buffer in bytes.
|
||||
* On output, the size of data returned in signature buffer in bytes.
|
||||
*
|
||||
* @retval true signature successfully generated in RSA-SSA PSS.
|
||||
* @retval false signature generation failed.
|
||||
* @retval false sig_size is too small.
|
||||
*
|
||||
**/
|
||||
bool libspdm_rsa_pss_sign(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
uint8_t *signature, size_t *sig_size)
|
||||
{
|
||||
return lkca_rsa_pss_sign(rsa_context, hash_nid, message_hash, hash_size,
|
||||
signature, sig_size);
|
||||
}
|
||||
//
|
||||
// In RM, we just need sign process; so we stub verification function.
|
||||
// Verification function is needed in GSP code only,
|
||||
//
|
||||
bool libspdm_rsa_pss_verify(void *rsa_context, size_t hash_nid,
|
||||
const uint8_t *message_hash, size_t hash_size,
|
||||
const uint8_t *signature, size_t sig_size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
153
kernel-open/nvidia/nv-caps-imex.c
Normal file
153
kernel-open/nvidia/nv-caps-imex.c
Normal file
@ -0,0 +1,153 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nv-linux.h"
|
||||
|
||||
extern int NVreg_ImexChannelCount;
|
||||
|
||||
static int nv_caps_imex_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_caps_imex_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct file_operations g_nv_caps_imex_fops =
|
||||
{
|
||||
.owner = THIS_MODULE,
|
||||
.open = nv_caps_imex_open,
|
||||
.release = nv_caps_imex_release
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
NvBool initialized;
|
||||
struct cdev cdev;
|
||||
dev_t devno;
|
||||
} g_nv_caps_imex;
|
||||
|
||||
int NV_API_CALL nv_caps_imex_channel_get(int fd)
|
||||
{
|
||||
#if NV_FILESYSTEM_ACCESS_AVAILABLE
|
||||
struct file *file;
|
||||
struct inode *inode;
|
||||
int channel = -1;
|
||||
|
||||
file = fget(fd);
|
||||
if (file == NULL)
|
||||
{
|
||||
return channel;
|
||||
}
|
||||
|
||||
inode = NV_FILE_INODE(file);
|
||||
if (inode == NULL)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Make sure the fd belongs to the nv-caps-imex-drv */
|
||||
if (file->f_op != &g_nv_caps_imex_fops)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* minor number is same as channel */
|
||||
channel = MINOR(inode->i_rdev);
|
||||
|
||||
out:
|
||||
fput(file);
|
||||
|
||||
return channel;
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int NV_API_CALL nv_caps_imex_channel_count(void)
|
||||
{
|
||||
return NVreg_ImexChannelCount;
|
||||
}
|
||||
|
||||
int NV_API_CALL nv_caps_imex_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (g_nv_caps_imex.initialized)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex is already initialized.\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (NVreg_ImexChannelCount == 0)
|
||||
{
|
||||
nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = alloc_chrdev_region(&g_nv_caps_imex.devno, 0,
|
||||
NVreg_ImexChannelCount,
|
||||
"nvidia-caps-imex-channels");
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to create cdev.\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
cdev_init(&g_nv_caps_imex.cdev, &g_nv_caps_imex_fops);
|
||||
|
||||
g_nv_caps_imex.cdev.owner = THIS_MODULE;
|
||||
|
||||
rc = cdev_add(&g_nv_caps_imex.cdev, g_nv_caps_imex.devno,
|
||||
NVreg_ImexChannelCount);
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to add cdev.\n");
|
||||
goto cdev_add_fail;
|
||||
}
|
||||
|
||||
g_nv_caps_imex.initialized = NV_TRUE;
|
||||
|
||||
return 0;
|
||||
|
||||
cdev_add_fail:
|
||||
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void NV_API_CALL nv_caps_imex_exit(void)
|
||||
{
|
||||
if (!g_nv_caps_imex.initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
cdev_del(&g_nv_caps_imex.cdev);
|
||||
|
||||
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
|
||||
|
||||
g_nv_caps_imex.initialized = NV_FALSE;
|
||||
}
|
34
kernel-open/nvidia/nv-caps-imex.h
Normal file
34
kernel-open/nvidia/nv-caps-imex.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NV_CAPS_IMEX_H_
|
||||
#define _NV_CAPS_IMEX_H_
|
||||
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
int NV_API_CALL nv_caps_imex_init(void);
|
||||
void NV_API_CALL nv_caps_imex_exit(void);
|
||||
int NV_API_CALL nv_caps_imex_channel_get(int fd);
|
||||
int NV_API_CALL nv_caps_imex_channel_count(void);
|
||||
|
||||
#endif /* _NV_CAPS_IMEX_H_ */
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -577,12 +577,9 @@ int nvidia_mmap_helper(
|
||||
//
|
||||
// This path is similar to the sysmem mapping code.
|
||||
// TODO: Refactor is needed as part of bug#2001704.
|
||||
// Use pfn_valid to determine whether the physical address has
|
||||
// backing struct page. This is used to isolate P8 from P9.
|
||||
//
|
||||
if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
|
||||
!IS_REG_OFFSET(nv, access_start, access_len) &&
|
||||
(pfn_valid(PFN_DOWN(mmap_start))))
|
||||
!IS_REG_OFFSET(nv, access_start, access_len))
|
||||
{
|
||||
ret = nvidia_mmap_numa(vma, mmap_context);
|
||||
if (ret)
|
||||
|
@ -839,6 +839,45 @@
|
||||
#define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
|
||||
#define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)
|
||||
|
||||
/*
|
||||
* Option: NVreg_ImexChannelCount
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* This option allows users to specify the number of IMEX (import/export)
|
||||
* channels. Within an IMEX domain, the channels allow sharing memory
|
||||
* securely in a multi-user environment using the CUDA driver's fabric handle
|
||||
* based APIs.
|
||||
*
|
||||
* An IMEX domain is either an OS instance or a group of securely
|
||||
* connected OS instances using the NVIDIA IMEX daemon. The option must
|
||||
* be set to the same value on each OS instance within the IMEX domain.
|
||||
*
|
||||
* An IMEX channel is a logical entity that is represented by a /dev node.
|
||||
* The IMEX channels are global resources within the IMEX domain. When
|
||||
* exporter and importer CUDA processes have been granted access to the
|
||||
* same IMEX channel, they can securely share memory.
|
||||
*
|
||||
* Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
|
||||
* the related CUDA APIs will fail with an insufficient permission error until
|
||||
* the /dev nodes are set up. The creation of these /dev nodes,
|
||||
* /dev/nvidia-caps-imex-channels/channelN, must be handled by the
|
||||
* administrator, where N is the minor number. The major number can be
|
||||
* queried from /proc/devices.
|
||||
*
|
||||
* nvidia-modprobe CLI support is available to set up the /dev nodes.
|
||||
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
|
||||
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
|
||||
*
|
||||
* Possible values:
|
||||
* 0 - Disable IMEX using CUDA driver's fabric handles.
|
||||
* N - N IMEX channels will be enabled in the driver to facilitate N
|
||||
* concurrent users. Default value is 2048 channels, and the current
|
||||
* maximum value is 20-bit, same as Linux dev_t's minor number limit.
|
||||
*/
|
||||
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
|
||||
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
|
||||
|
||||
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
|
||||
|
||||
/*
|
||||
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
|
||||
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
|
||||
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
|
||||
|
||||
/*
|
||||
*----------------registry database definition----------------------
|
||||
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -55,6 +55,7 @@
|
||||
#include "nv-kthread-q.h"
|
||||
#include "nv-pat.h"
|
||||
#include "nv-dmabuf.h"
|
||||
#include "nv-caps-imex.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
@ -825,11 +826,18 @@ static int __init nvidia_init_module(void)
|
||||
goto procfs_exit;
|
||||
}
|
||||
|
||||
rc = nv_caps_imex_init();
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
|
||||
goto caps_root_exit;
|
||||
}
|
||||
|
||||
rc = nv_module_init(&sp);
|
||||
if (rc < 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
|
||||
goto caps_root_exit;
|
||||
goto caps_imex_exit;
|
||||
}
|
||||
|
||||
count = nvos_count_devices();
|
||||
@ -941,6 +949,9 @@ drivers_exit:
|
||||
module_exit:
|
||||
nv_module_exit(sp);
|
||||
|
||||
caps_imex_exit:
|
||||
nv_caps_imex_exit();
|
||||
|
||||
caps_root_exit:
|
||||
nv_caps_root_exit();
|
||||
|
||||
@ -967,6 +978,8 @@ static void __exit nvidia_exit_module(void)
|
||||
|
||||
nv_module_exit(sp);
|
||||
|
||||
nv_caps_imex_exit();
|
||||
|
||||
nv_caps_root_exit();
|
||||
|
||||
nv_procfs_exit();
|
||||
@ -2040,7 +2053,7 @@ nvidia_close_callback(
|
||||
{
|
||||
nv_linux_state_t *nvl;
|
||||
nv_state_t *nv;
|
||||
nvidia_stack_t *sp;
|
||||
nvidia_stack_t *sp = nvlfp->sp;
|
||||
NvBool bRemove = NV_FALSE;
|
||||
|
||||
nvl = nvlfp->nvptr;
|
||||
@ -2052,12 +2065,11 @@ nvidia_close_callback(
|
||||
*/
|
||||
|
||||
nv_free_file_private(nvlfp);
|
||||
nv_kmem_cache_free_stack(nvlfp->sp);
|
||||
nv_kmem_cache_free_stack(sp);
|
||||
return;
|
||||
}
|
||||
|
||||
nv = NV_STATE_PTR(nvl);
|
||||
sp = nvlfp->sp;
|
||||
|
||||
rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
|
||||
|
||||
@ -6050,6 +6062,131 @@ failed:
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
void NV_API_CALL nv_get_screen_info(
|
||||
nv_state_t *nv,
|
||||
NvU64 *pPhysicalAddress,
|
||||
NvU32 *pFbWidth,
|
||||
NvU32 *pFbHeight,
|
||||
NvU32 *pFbDepth,
|
||||
NvU32 *pFbPitch,
|
||||
NvU64 *pFbSize
|
||||
)
|
||||
{
|
||||
*pPhysicalAddress = 0;
|
||||
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
|
||||
|
||||
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
|
||||
if (num_registered_fb > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_registered_fb; i++)
|
||||
{
|
||||
if (!registered_fb[i])
|
||||
continue;
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
|
||||
{
|
||||
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
|
||||
*pFbWidth = registered_fb[i]->var.xres;
|
||||
*pFbHeight = registered_fb[i]->var.yres;
|
||||
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
|
||||
*pFbPitch = registered_fb[i]->fix.line_length;
|
||||
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the screen info is not found in the registered FBs then fallback
|
||||
* to the screen_info structure.
|
||||
*
|
||||
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
|
||||
* generic framebuffers so the new generic system-framebuffer drivers can
|
||||
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
|
||||
* device created by SYSFB_SIMPLEFB.
|
||||
*
|
||||
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
|
||||
* information required by nv_get_screen_info(), therefore you need to
|
||||
* fall back onto the screen_info structure.
|
||||
*
|
||||
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
|
||||
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
|
||||
*/
|
||||
|
||||
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
|
||||
/*
|
||||
* If there is not a framebuffer console, return 0 size.
|
||||
*
|
||||
* orig_video_isVGA is set to 1 during early Linux kernel
|
||||
* initialization, and then will be set to a value, such as
|
||||
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
|
||||
*/
|
||||
if (screen_info.orig_video_isVGA > 1)
|
||||
{
|
||||
NvU64 physAddr = screen_info.lfb_base;
|
||||
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
#endif
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
|
||||
{
|
||||
*pPhysicalAddress = physAddr;
|
||||
*pFbWidth = screen_info.lfb_width;
|
||||
*pFbHeight = screen_info.lfb_height;
|
||||
*pFbDepth = screen_info.lfb_depth;
|
||||
*pFbPitch = screen_info.lfb_linelength;
|
||||
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
|
||||
}
|
||||
}
|
||||
#else
|
||||
{
|
||||
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
|
||||
struct pci_dev *pci_dev = nvl->pci_dev;
|
||||
int i;
|
||||
|
||||
if (pci_dev == NULL)
|
||||
return;
|
||||
|
||||
BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
|
||||
for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
|
||||
{
|
||||
int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
|
||||
struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
|
||||
struct resource *res = gpu_bar_res->child;
|
||||
|
||||
/*
|
||||
* Console resource will become child resource of pci-dev resource.
|
||||
* Check if child resource start address matches with expected
|
||||
* console start address.
|
||||
*/
|
||||
if ((res != NULL) &&
|
||||
NV_IS_CONSOLE_MAPPED(nv, res->start))
|
||||
{
|
||||
NvU32 res_name_len = strlen(res->name);
|
||||
|
||||
/*
|
||||
* The resource name ends with 'fb' (efifb, vesafb, etc.).
|
||||
* For simple-framebuffer, the resource name is 'BOOTFB'.
|
||||
* Confirm if the resources name either ends with 'fb' or 'FB'.
|
||||
*/
|
||||
if ((res_name_len > 2) &&
|
||||
!strcasecmp((res->name + res_name_len - 2), "fb"))
|
||||
{
|
||||
*pPhysicalAddress = res->start;
|
||||
*pFbSize = resource_size(res);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
module_init(nvidia_init_module);
|
||||
module_exit(nvidia_exit_module);
|
||||
|
@ -279,9 +279,11 @@ NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
|
||||
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush);
|
||||
|
||||
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo, NvBool bEnable);
|
||||
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable);
|
||||
|
||||
// Interface used for CCSL
|
||||
|
||||
|
@ -985,24 +985,30 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceGetNonReplayableFaults);
|
||||
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device)
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush)
|
||||
{
|
||||
nvidia_stack_t *sp = nvUvmGetSafeStack();
|
||||
NV_STATUS status;
|
||||
|
||||
status = rm_gpu_ops_flush_replayable_fault_buffer(sp, (gpuDeviceHandle)device);
|
||||
status = rm_gpu_ops_flush_replayable_fault_buffer(sp,
|
||||
pFaultInfo,
|
||||
bCopyAndFlush);
|
||||
|
||||
nvUvmFreeSafeStack(sp);
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(nvUvmInterfaceFlushReplayableFaultBuffer);
|
||||
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable)
|
||||
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
NvBool bEnable)
|
||||
{
|
||||
nvidia_stack_t *sp = nvUvmGetSafeStack();
|
||||
NV_STATUS status;
|
||||
|
||||
status = rm_gpu_ops_toggle_prefetch_faults(sp, pFaultInfo, bEnable);
|
||||
status = rm_gpu_ops_toggle_prefetch_faults(sp,
|
||||
pFaultInfo,
|
||||
bEnable);
|
||||
|
||||
nvUvmFreeSafeStack(sp);
|
||||
return status;
|
||||
|
@ -30,18 +30,21 @@ NVIDIA_SOURCES += nvidia/nv-report-err.c
|
||||
NVIDIA_SOURCES += nvidia/nv-rsync.c
|
||||
NVIDIA_SOURCES += nvidia/nv-msi.c
|
||||
NVIDIA_SOURCES += nvidia/nv-caps.c
|
||||
NVIDIA_SOURCES += nvidia/nv-caps-imex.c
|
||||
NVIDIA_SOURCES += nvidia/nv_uvm_interface.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_aead.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_ecc.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hkdf.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rand.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_shash.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rsa.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_ec.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_x509.c
|
||||
NVIDIA_SOURCES += nvidia/libspdm_rsa_ext.c
|
||||
NVIDIA_SOURCES += nvidia/nvlink_linux.c
|
||||
NVIDIA_SOURCES += nvidia/nvlink_caps.c
|
||||
NVIDIA_SOURCES += nvidia/linux_nvswitch.c
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include "os-interface.h"
|
||||
#include "nv-linux.h"
|
||||
#include "nv-caps-imex.h"
|
||||
|
||||
#include "nv-time.h"
|
||||
|
||||
@ -59,6 +60,8 @@ NvBool os_dma_buf_enabled = NV_TRUE;
|
||||
NvBool os_dma_buf_enabled = NV_FALSE;
|
||||
#endif // CONFIG_DMA_SHARED_BUFFER
|
||||
|
||||
NvBool os_imex_channel_is_supported = NV_TRUE;
|
||||
|
||||
void NV_API_CALL os_disable_console_access(void)
|
||||
{
|
||||
console_lock();
|
||||
@ -1231,90 +1234,6 @@ NvBool NV_API_CALL os_is_efi_enabled(void)
|
||||
return efi_enabled(EFI_BOOT);
|
||||
}
|
||||
|
||||
void NV_API_CALL os_get_screen_info(
|
||||
NvU64 *pPhysicalAddress,
|
||||
NvU32 *pFbWidth,
|
||||
NvU32 *pFbHeight,
|
||||
NvU32 *pFbDepth,
|
||||
NvU32 *pFbPitch,
|
||||
NvU64 consoleBar1Address,
|
||||
NvU64 consoleBar2Address
|
||||
)
|
||||
{
|
||||
*pPhysicalAddress = 0;
|
||||
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
|
||||
|
||||
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
|
||||
if (num_registered_fb > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_registered_fb; i++)
|
||||
{
|
||||
if (!registered_fb[i])
|
||||
continue;
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
|
||||
(registered_fb[i]->fix.smem_start == consoleBar2Address))
|
||||
{
|
||||
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
|
||||
*pFbWidth = registered_fb[i]->var.xres;
|
||||
*pFbHeight = registered_fb[i]->var.yres;
|
||||
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
|
||||
*pFbPitch = registered_fb[i]->fix.line_length;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the screen info is not found in the registered FBs then fallback
|
||||
* to the screen_info structure.
|
||||
*
|
||||
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
|
||||
* generic framebuffers so the new generic system-framebuffer drivers can
|
||||
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
|
||||
* device created by SYSFB_SIMPLEFB.
|
||||
*
|
||||
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
|
||||
* information required by os_get_screen_info(), therefore you need to
|
||||
* fall back onto the screen_info structure.
|
||||
*
|
||||
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
|
||||
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
|
||||
*/
|
||||
|
||||
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
|
||||
/*
|
||||
* If there is not a framebuffer console, return 0 size.
|
||||
*
|
||||
* orig_video_isVGA is set to 1 during early Linux kernel
|
||||
* initialization, and then will be set to a value, such as
|
||||
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
|
||||
*/
|
||||
if (screen_info.orig_video_isVGA > 1)
|
||||
{
|
||||
NvU64 physAddr = screen_info.lfb_base;
|
||||
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
|
||||
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
|
||||
#endif
|
||||
|
||||
/* Make sure base address is mapped to GPU BAR */
|
||||
if ((physAddr == consoleBar1Address) ||
|
||||
(physAddr == consoleBar2Address))
|
||||
{
|
||||
*pPhysicalAddress = physAddr;
|
||||
*pFbWidth = screen_info.lfb_width;
|
||||
*pFbHeight = screen_info.lfb_height;
|
||||
*pFbDepth = screen_info.lfb_depth;
|
||||
*pFbPitch = screen_info.lfb_linelength;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void NV_API_CALL os_dump_stack(void)
|
||||
{
|
||||
dump_stack();
|
||||
@ -2182,6 +2101,22 @@ void NV_API_CALL os_nv_cap_close_fd
|
||||
nv_cap_close_fd(fd);
|
||||
}
|
||||
|
||||
NvS32 NV_API_CALL os_imex_channel_count
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
return nv_caps_imex_channel_count();
|
||||
}
|
||||
|
||||
NvS32 NV_API_CALL os_imex_channel_get
|
||||
(
|
||||
NvU64 descriptor
|
||||
)
|
||||
{
|
||||
return nv_caps_imex_channel_get((int)descriptor);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads the total memory and free memory of a NUMA node from the kernel.
|
||||
*/
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -87,59 +87,10 @@ static NV_STATUS get_io_ptes(struct vm_area_struct *vma,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Pins user IO pages that have been mapped to the user processes virtual
|
||||
* address space with remap_pfn_range.
|
||||
*
|
||||
* @param[in] vma VMA that contains the virtual address range given by the
|
||||
* start and the page count.
|
||||
* @param[in] start Beginning of the virtual address range of the IO pages.
|
||||
* @param[in] page_count Number of pages to pin from start.
|
||||
* @param[in,out] page_array Storage array for pointers to the pinned pages.
|
||||
* Must be large enough to contain at least page_count
|
||||
* pointers.
|
||||
*
|
||||
* @return NV_OK if the pages were pinned successfully, error otherwise.
|
||||
*/
|
||||
static NV_STATUS get_io_pages(struct vm_area_struct *vma,
|
||||
NvUPtr start,
|
||||
NvU64 page_count,
|
||||
struct page **page_array)
|
||||
{
|
||||
NV_STATUS rmStatus = NV_OK;
|
||||
NvU64 i, pinned = 0;
|
||||
unsigned long pfn;
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
{
|
||||
if ((nv_follow_pfn(vma, (start + (i * PAGE_SIZE)), &pfn) < 0) ||
|
||||
(!pfn_valid(pfn)))
|
||||
{
|
||||
rmStatus = NV_ERR_INVALID_ADDRESS;
|
||||
break;
|
||||
}
|
||||
|
||||
// Page-backed memory mapped to userspace with remap_pfn_range
|
||||
page_array[i] = pfn_to_page(pfn);
|
||||
get_page(page_array[i]);
|
||||
pinned++;
|
||||
}
|
||||
|
||||
if (pinned < page_count)
|
||||
{
|
||||
for (i = 0; i < pinned; i++)
|
||||
put_page(page_array[i]);
|
||||
rmStatus = NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory(
|
||||
void *address,
|
||||
NvU64 page_count,
|
||||
NvU64 **pte_array,
|
||||
void **page_array
|
||||
NvU64 **pte_array
|
||||
)
|
||||
{
|
||||
NV_STATUS rmStatus;
|
||||
@ -187,18 +138,9 @@ NV_STATUS NV_API_CALL os_lookup_user_io_memory(
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (pfn_valid(pfn))
|
||||
{
|
||||
rmStatus = get_io_pages(vma, start, page_count, (struct page **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*page_array = (void *)result_array;
|
||||
}
|
||||
else
|
||||
{
|
||||
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*pte_array = (NvU64 *)result_array;
|
||||
}
|
||||
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
|
||||
if (rmStatus == NV_OK)
|
||||
*pte_array = (NvU64 *)result_array;
|
||||
|
||||
done:
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r551_06
|
||||
#define NV_BUILD_BRANCH r551_40
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r551_06
|
||||
#define NV_PUBLIC_BRANCH r551_40
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r551_06-132"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33773930)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r551_40-170"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33933991)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r550/r551_06-132"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r550/r551_40-170"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33933991)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r551_06-14"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33773930)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "551.23"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
|
||||
#define NV_BUILD_BRANCH_VERSION "r551_40-13"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33924744)
|
||||
#define NV_BUILD_TYPE "Nightly"
|
||||
#define NV_BUILD_NAME "r551_40-240221"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33921227)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R550
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -94,8 +94,9 @@ static inline void NvTimeSemFermiSetMaxSubmitted(
|
||||
NvTimeSemFermiSetMaxSubmittedVal(&report->timer, value);
|
||||
}
|
||||
|
||||
static inline NvU64 NvTimeSemFermiGetPayload(
|
||||
NvReportSemaphore32 *report)
|
||||
static inline NvU64 NvTimeSemFermiGetPayloadVal(
|
||||
volatile void *payloadPtr,
|
||||
volatile void *maxSubmittedPtr)
|
||||
{
|
||||
// The ordering of the two operations below is critical. Other threads
|
||||
// may be submitting GPU work that modifies the semaphore value, or
|
||||
@ -129,11 +130,11 @@ static inline NvU64 NvTimeSemFermiGetPayload(
|
||||
// adjust the max submitted value back down if a wrap occurs between these
|
||||
// two operations, but has no way to bump the max submitted value up if a
|
||||
// wrap occurs with the opposite ordering.
|
||||
NvU64 current = report->payload;
|
||||
NvU64 current = *(volatile NvU32*)payloadPtr;
|
||||
// Use an atomic exchange to ensure the 64-bit read is atomic even on 32-bit
|
||||
// CPUs.
|
||||
NvU64 submitted = (NvU64)
|
||||
__NVatomicCompareExchange64((volatile NvS64 *)&report->timer, 0ll, 0ll);
|
||||
__NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr, 0ll, 0ll);
|
||||
|
||||
nvAssert(!(current & 0xFFFFFFFF00000000ull));
|
||||
|
||||
@ -152,6 +153,12 @@ static inline NvU64 NvTimeSemFermiGetPayload(
|
||||
return current;
|
||||
}
|
||||
|
||||
static inline NvU64 NvTimeSemFermiGetPayload(
|
||||
NvReportSemaphore32 *report)
|
||||
{
|
||||
return NvTimeSemFermiGetPayloadVal(&report->payload, &report->timer);
|
||||
}
|
||||
|
||||
static inline void NvTimeSemFermiSetPayload(
|
||||
NvReportSemaphore32 *report,
|
||||
const NvU64 payload)
|
||||
@ -167,12 +174,19 @@ static inline void NvTimeSemFermiSetPayload(
|
||||
* Volta and up.
|
||||
*/
|
||||
|
||||
static inline NvU64 NvTimeSemVoltaGetPayloadVal(
|
||||
volatile void *payloadPtr)
|
||||
{
|
||||
nvAssert(payloadPtr);
|
||||
return (NvU64)
|
||||
__NVatomicCompareExchange64((volatile NvS64 *)payloadPtr,
|
||||
0, 0);
|
||||
}
|
||||
|
||||
static inline NvU64 NvTimeSemVoltaGetPayload(
|
||||
NvReportSemaphore64 *report)
|
||||
{
|
||||
return (NvU64)
|
||||
__NVatomicCompareExchange64((volatile NvS64 *)&report->reportValue,
|
||||
0, 0);
|
||||
return NvTimeSemVoltaGetPayloadVal(&report->reportValue);
|
||||
}
|
||||
|
||||
static inline void NvTimeSemVoltaSetPayload(
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "550.40.07"
|
||||
#define NV_VERSION_STRING "550.54.14"
|
||||
|
||||
#else
|
||||
|
||||
|
32
src/common/inc/swref/published/ampere/ga100/dev_ctxsw_prog.h
Normal file
32
src/common/inc/swref/published/ampere/ga100/dev_ctxsw_prog.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GA100_DEV_CTXSW_PROG_H
|
||||
#define GA100_DEV_CTXSW_PROG_H
|
||||
|
||||
#define NV_CTXSW_TIMESTAMP_BUFFER_RD_WR_POINTER 30:0 /* */
|
||||
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE 31:31 /* */
|
||||
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_ENABLED 0x1 /* */
|
||||
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_DISABLED 0x0 /* */
|
||||
|
||||
#endif
|
@ -123,9 +123,10 @@
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_FALSE 0x00000000 /* -WE-V */
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_TRUE 0x00000001 /* -W--V */
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_MAX_CACHELINE_SIZE 0x00000010 /* */
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL 0x2200 /* -W-4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_0 0x30080 /* R--4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC 31:5 /* R-XUF */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_1 0x30084 /* R--4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC 28:0 /* R-XUF */
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL 0x2200 /* -W-4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_0 0x30080 /* R--4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC 31:5 /* R-XUF */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_1 0x30084 /* R--4R */
|
||||
#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC 28:0 /* R-XUF */
|
||||
#define NV_VIRTUAL_FUNCTION_PRIV_MAILBOX_SCRATCH(i) (0x2100+(i)*4) /* RW-4A */
|
||||
#endif // __ga100_dev_vm_h__
|
||||
|
@ -991,7 +991,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
|
||||
pInfo->cc_white_y |= (p->Chromaticity[1] & NVT_PVT_EDID_CC_WHITE_Y1_Y0_MASK) >> NVT_PVT_EDID_CC_WHITE_Y1_Y0_SHIFT;
|
||||
|
||||
// copy established timings
|
||||
pInfo->established_timings_1_2 = (NvU16)p->bEstablishedTimings1 << 8;
|
||||
pInfo->established_timings_1_2 = (NvU16)p->bEstablishedTimings1 << 8;
|
||||
pInfo->established_timings_1_2 |= (NvU16)p->bEstablishedTimings2;
|
||||
|
||||
// copy manuf reserved timings
|
||||
@ -1039,7 +1039,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
|
||||
p861Info = (k == 0) ? &pInfo->ext861 : &pInfo->ext861_2;
|
||||
|
||||
get861ExtInfo(pExt, sizeof(EDIDV1STRUC), p861Info);
|
||||
|
||||
|
||||
// HF EEODB is present in edid v1.3 and v1.4 does not need this.Also, it is always present in the 1st CTA extension block.
|
||||
if (j == 1 && pInfo->version == NVT_EDID_VER_1_3)
|
||||
{
|
||||
@ -1106,11 +1106,6 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
|
||||
pInfo->ext_displayid20.interface_features.yuv420_min_pclk = 0;
|
||||
}
|
||||
|
||||
if (pInfo->ext861.revision == 0 && pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
|
||||
{
|
||||
pInfo->ext861.revision = NVT_CEA861_REV_B;
|
||||
}
|
||||
|
||||
if (pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
|
||||
{
|
||||
pInfo->ext861.basic_caps |= pInfo->ext_displayid20.basic_caps;
|
||||
@ -1157,7 +1152,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
|
||||
}
|
||||
}
|
||||
|
||||
// Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[]
|
||||
// Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[]
|
||||
for (i = 0; i < pInfo->ext_displayid20.total_timings; i++)
|
||||
{
|
||||
if (!assignNextAvailableTiming(pInfo, &(pInfo->ext_displayid20.timing[i])))
|
||||
@ -1215,7 +1210,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
|
||||
|
||||
CODE_SEGMENT(PAGE_DD_CODE)
|
||||
void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
|
||||
{
|
||||
{
|
||||
NvU32 i, j, data;
|
||||
|
||||
for (i = 0; i < pInfo->total_timings; i++)
|
||||
@ -1226,8 +1221,8 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
|
||||
case NVT_TYPE_HDMI_STEREO:
|
||||
case NVT_TYPE_HDMI_EXT:
|
||||
// VTB timing use the base EDID (block 0) to determine the color format support
|
||||
case NVT_TYPE_EDID_VTB_EXT:
|
||||
case NVT_TYPE_EDID_VTB_EXT_STD:
|
||||
case NVT_TYPE_EDID_VTB_EXT:
|
||||
case NVT_TYPE_EDID_VTB_EXT_STD:
|
||||
case NVT_TYPE_EDID_VTB_EXT_DTD:
|
||||
case NVT_TYPE_EDID_VTB_EXT_CVT:
|
||||
// pInfo->u.feature_ver_1_3.color_type provides mono, rgb, rgy, undefined
|
||||
@ -1245,7 +1240,7 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
|
||||
}
|
||||
updateBpcForTiming(pInfo, i);
|
||||
break;
|
||||
default:
|
||||
default:
|
||||
// * the displayID_v1.3/v2.0 EDID extension need to follow the EDID bpc definition.
|
||||
// * all other default to base edid
|
||||
updateBpcForTiming(pInfo, i);
|
||||
@ -1319,7 +1314,7 @@ NvBool isMatchedStandardTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)
|
||||
|
||||
for (j = 0; j < pInfo->total_timings; j++)
|
||||
{
|
||||
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD &&
|
||||
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD &&
|
||||
NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
|
||||
{
|
||||
return NV_TRUE;
|
||||
@ -1335,7 +1330,7 @@ NvBool isMatchedEstablishedTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)
|
||||
|
||||
for (j = 0; j < pInfo->total_timings; j++)
|
||||
{
|
||||
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST &&
|
||||
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST &&
|
||||
NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
|
||||
{
|
||||
return NV_TRUE;
|
||||
@ -1405,7 +1400,7 @@ void updateBpcForTiming(NVT_EDID_INFO *pInfo, NvU32 index)
|
||||
}
|
||||
}
|
||||
else if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
|
||||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
|
||||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
|
||||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_UNDEFINED) &&
|
||||
p861Info->revision >= NVT_CEA861_REV_A)
|
||||
{
|
||||
@ -1462,7 +1457,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,
|
||||
|
||||
// the timing mapping index :
|
||||
//
|
||||
// native_cta - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators
|
||||
// native_cta - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators
|
||||
// if the Source supports NVRDB and the NVRDB was found in the E-EDID
|
||||
// preferred_cta - the "prefer SVD" in CTA-861-F (i.e. A Sink that prefers a Video Format that is not listed as an SVD in Video Data Block, but instead listed in YCBCR 4:2:0 VDB)
|
||||
// preferred_displayid_dtd - the "prefer detailed timing of DispalyID" extension
|
||||
@ -1546,7 +1541,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,
|
||||
if (native_cta == pEdidInfo->total_timings && NVT_NATIVE_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
|
||||
{
|
||||
native_cta = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (preferred_cta == pEdidInfo->total_timings && NVT_PREFERRED_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
|
||||
{
|
||||
@ -2063,10 +2058,10 @@ NVT_STATUS NvTiming_GetEDIDBasedASPRTiming( NvU16 width, NvU16 height, NvU16 rr,
|
||||
*
|
||||
* @brief check EDID raw data is valid or not, and it will return the err flags if it existed
|
||||
* @param pEdid : this is a pointer to EDID data
|
||||
* @param length : read length of EDID
|
||||
* @param length : read length of EDID
|
||||
* @param bIsTrongValidation : true - added more check
|
||||
* false- only header and checksum and size check
|
||||
*
|
||||
*
|
||||
*/
|
||||
CODE_SEGMENT(PAGE_DD_CODE)
|
||||
NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongValidation)
|
||||
@ -2086,12 +2081,12 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal
|
||||
return ret;
|
||||
}
|
||||
|
||||
// check the EDID version and signature
|
||||
// check the EDID version and signature
|
||||
if (getEdidVersion(pEdid, &version) != NVT_STATUS_SUCCESS)
|
||||
{
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
// check block 0 checksum value
|
||||
if (!isChecksumValid(pEdid))
|
||||
@ -2239,11 +2234,11 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief sanity check EDID binary frequently used data block is valid or not,
|
||||
* @brief sanity check EDID binary frequently used data block is valid or not,
|
||||
* and it will return error checkpoint flag if it existed
|
||||
* @param pEdid : this is a pointer to EDID raw data
|
||||
* @param length : read length of EDID
|
||||
*
|
||||
*
|
||||
*/
|
||||
CODE_SEGMENT(PAGE_DD_CODE)
|
||||
NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
@ -2255,7 +2250,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
DETAILEDTIMINGDESCRIPTOR *pDTD;
|
||||
// For CTA861
|
||||
NvU8 ctaDTD_Offset;
|
||||
NvU8 *pData_collection;
|
||||
NvU8 *pData_collection;
|
||||
NvU32 ctaBlockTag, ctaPayload, vic;
|
||||
// For DisplayID
|
||||
DIDEXTENSION *pDisplayid;
|
||||
@ -2283,7 +2278,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
{
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
|
||||
}
|
||||
|
||||
|
||||
// 18bytes in DTD or Display Descriptor check
|
||||
for (i = 0; i < NVT_EDID_MAX_LONG_DISPLAY_DESCRIPTOR; i++)
|
||||
{
|
||||
@ -2313,7 +2308,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
pLdd = (EDID_LONG_DISPLAY_DESCRIPTOR *)&p->DetailedTimingDesc[i];
|
||||
|
||||
// This block is a display descriptor, validate
|
||||
@ -2327,7 +2322,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
NvU8 max_v_rate_offset, min_v_rate_offset, max_h_rate_offset, min_h_rate_offset;
|
||||
|
||||
// add 255Hz offsets as needed before doing the check, use descriptor->rsvd2
|
||||
nvt_assert(!(pLdd->rsvd2 & 0xF0));
|
||||
nvt_assert(!(pLdd->rsvd2 & 0xF0));
|
||||
|
||||
max_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MAX ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
|
||||
min_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MIN ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
|
||||
@ -2340,19 +2335,19 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
pRangeLimit->maxHRate == 0)
|
||||
{
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_RANGE_LIMIT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extension and size check
|
||||
if ((NvU32)(p->bExtensionFlag + 1) * sizeof(EDIDV1STRUC) > length)
|
||||
{
|
||||
{
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT);
|
||||
}
|
||||
|
||||
// we shall not trust any extension blocks with wrong input EDID size
|
||||
if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) ||
|
||||
// we shall not trust any extension blocks with wrong input EDID size
|
||||
if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) ||
|
||||
NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT))
|
||||
return ret;
|
||||
|
||||
@ -2384,7 +2379,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
// validate SVD block
|
||||
ctaBlockTag = NVT_CEA861_GET_SHORT_DESCRIPTOR_TAG(((EIA861EXTENSION *)pExt)->data[0]);
|
||||
pData_collection = ((EIA861EXTENSION *)pExt)->data;
|
||||
|
||||
|
||||
while ((ctaDTD_Offset - 4) > 0 && pData_collection != &pExt[ctaDTD_Offset] &&
|
||||
ctaBlockTag > NVT_CEA861_TAG_RSVD && ctaBlockTag <= NVT_CEA861_TAG_EXTENDED_FLAG)
|
||||
{
|
||||
@ -2451,7 +2446,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DTD);
|
||||
else
|
||||
{
|
||||
// check the max image size and
|
||||
// check the max image size and
|
||||
if (p->bMaxHorizImageSize != 0 && p->bMaxVertImageSize != 0)
|
||||
{
|
||||
NvU16 hDTDImageSize = (pDTD->bDTHorizVertImage & 0xF0) << 4 | pDTD->bDTHorizontalImage;
|
||||
@ -2466,7 +2461,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
|
||||
if(!isChecksumValid(pExt))
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_CTA_CHECKSUM);
|
||||
break;
|
||||
break;
|
||||
case NVT_EDID_EXTENSION_DISPLAYID:
|
||||
pDisplayid = ((DIDEXTENSION *)pExt);
|
||||
if (pDisplayid->ext_count != 0)
|
||||
@ -2483,10 +2478,10 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
{
|
||||
if ((pDisplayid->struct_version & 0xFF) == 0x21)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID_VERSION);
|
||||
|
||||
|
||||
did2ExtCount++;
|
||||
|
||||
if (pDisplayid->use_case == 0 && did2ExtCount == 1)
|
||||
if (pDisplayid->use_case == 0 && did2ExtCount == 1)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_USE_CASE);
|
||||
|
||||
// check the DisplayId2 valid timing
|
||||
@ -2506,7 +2501,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TYPE7);
|
||||
|
||||
if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_RANGE_LIMITS)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
|
||||
|
||||
if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_ADAPTIVE_SYNC)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_ADAPTIVE_SYNC);
|
||||
@ -2527,9 +2522,9 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
}
|
||||
|
||||
// if the first tag failed, ignore all the tags afterward then
|
||||
if (!bAllZero &&
|
||||
(pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY ||
|
||||
(pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE &&
|
||||
if (!bAllZero &&
|
||||
(pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY ||
|
||||
(pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE &&
|
||||
pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_VENDOR_SPEC &&
|
||||
pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_CTA_DATA)) &&
|
||||
(pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
|
||||
@ -2537,7 +2532,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TAG);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((pDisplayid->struct_version & 0xFF) == 0x12 || (pDisplayid->struct_version & 0xFF) == 0x13)
|
||||
{
|
||||
if ((pDisplayid->struct_version & 0xFF) == 0x13)
|
||||
@ -2559,7 +2554,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
|
||||
if (pHeader->type == NVT_DISPLAYID_BLOCK_TYPE_RANGE_LIMITS)
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
|
||||
|
||||
|
||||
// add more data blocks tag here to evaluate
|
||||
}
|
||||
pData_collection += block_length;
|
||||
@ -2580,7 +2575,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
||||
if (!bAllZero &&
|
||||
pHeader->type > NVT_DISPLAYID_BLOCK_TYPE_TILEDDISPLAY &&
|
||||
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_CTA_DATA &&
|
||||
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&
|
||||
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&
|
||||
(pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
|
||||
{
|
||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID13_TAG);
|
||||
@ -2939,7 +2934,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)
|
||||
|
||||
// Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
|
||||
NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);
|
||||
|
||||
|
||||
// Wipe out the checksums
|
||||
CommonEDIDBuffer[CommonEDIDBuffer[1]+5/*mandatory bytes*/-1] = 0;
|
||||
CommonEDIDBuffer[0xFF] = 0;
|
||||
@ -2954,7 +2949,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)
|
||||
// displayId2 standalone uses 256 length sections
|
||||
commonEDIDBufferSize = 256;
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
|
||||
NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);
|
||||
|
@ -111,7 +111,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
|
||||
nvt_assert((timingIdx) <= COUNT(pInfo->timing));
|
||||
|
||||
if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
|
||||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
|
||||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
|
||||
pInfo->ext861.valid.H14B_VSDB || pInfo->ext861.valid.H20_HF_VSDB) && pInfo->ext861.revision >= NVT_CEA861_REV_A)
|
||||
{
|
||||
if (!pInfo->ext_displayid.supported_displayId2_0)
|
||||
@ -153,7 +153,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
|
||||
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_10b,
|
||||
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_12b,
|
||||
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_14b,
|
||||
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);
|
||||
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
|
||||
pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_14b,
|
||||
pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_16b);
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// yuv444
|
||||
UPDATE_BPC_FOR_COLORFORMAT(pT->etc.yuv444, 0, /* yuv444 does not support 6bpc */
|
||||
@ -264,7 +264,7 @@ static NVT_STATUS parseDisplayIdSection(DISPLAYID_SECTION * section,
|
||||
* @brief Parses a displayID data block
|
||||
* @param block The DisplayID data block to parse
|
||||
* @param max_length The indicated total length of the each data block for checking
|
||||
* @param pLength return the indicated length of the each data block
|
||||
* @param pLength return the indicated length of the each data block
|
||||
* @param pEdidInfo EDID struct containing DisplayID information and
|
||||
* the timings or validation purpose if it is NULL
|
||||
*/
|
||||
@ -285,7 +285,7 @@ NVT_STATUS parseDisplayIdBlock(NvU8* pBlock,
|
||||
return NVT_STATUS_ERR;
|
||||
|
||||
pInfo = pEdidInfo == NULL ? NULL : &pEdidInfo->ext_displayid;
|
||||
|
||||
|
||||
*pLength = hdr->data_bytes + NVT_DISPLAYID_DATABLOCK_HEADER_LEN;
|
||||
|
||||
switch (hdr->type)
|
||||
@ -386,9 +386,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf
|
||||
|
||||
for (i = 0; i < prim_num; i++)
|
||||
{
|
||||
x_p = (blk->points)[i].color_x_bits_low +
|
||||
x_p = (blk->points)[i].color_x_bits_low +
|
||||
(DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
|
||||
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
|
||||
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
|
||||
((blk->points)[i].color_y_bits_high << 4);
|
||||
pInfo->primaries[i].x = x_p;
|
||||
pInfo->primaries[i].y = y_p;
|
||||
@ -396,9 +396,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf
|
||||
|
||||
for (j = 0; j < wp_num; j++)
|
||||
{
|
||||
x_p = (blk->points)[i].color_x_bits_low +
|
||||
x_p = (blk->points)[i].color_x_bits_low +
|
||||
(DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
|
||||
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
|
||||
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
|
||||
((blk->points)[i].color_y_bits_high << 4);
|
||||
pInfo->white_points[pInfo->total_primaries + j].x = x_p;
|
||||
pInfo->white_points[pInfo->total_primaries + j].y = y_p;
|
||||
@ -508,7 +508,6 @@ static NVT_STATUS parseDisplayIdTiming1(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
|
||||
CODE_SEGMENT(PAGE_DD_CODE)
|
||||
static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR * type1, NVT_TIMING *pT)
|
||||
{
|
||||
NvU32 totalPixels_in_2_fields;
|
||||
if (type1 == NULL || pT == NULL)
|
||||
return NVT_STATUS_ERR;
|
||||
|
||||
@ -569,30 +568,17 @@ static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR
|
||||
}
|
||||
|
||||
// the refresh rate
|
||||
if (pT->interlaced)
|
||||
{
|
||||
// in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
|
||||
totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
|
||||
// calculate the field rate in interlaced mode
|
||||
pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
|
||||
pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate frame rate in progressive mode
|
||||
// in progressive mode filed = frame
|
||||
pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
|
||||
pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
|
||||
}
|
||||
pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
|
||||
pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
|
||||
pT->etc.name[39] = '\0';
|
||||
pT->etc.rep = 0x1; // bit mask for no pixel repetition
|
||||
|
||||
|
||||
pT->etc.status = NVT_STATUS_DISPLAYID_1;
|
||||
// Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have dependency on sequence
|
||||
// so we'll just update the preferred flag, not sequence them
|
||||
//pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
|
||||
pT->etc.flag |= type1->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
|
||||
|
||||
|
||||
/* Fields currently not used. Uncomment them for future use
|
||||
type1->options.stereo_support;
|
||||
*/
|
||||
@ -651,7 +637,6 @@ static NVT_STATUS parseDisplayIdTiming2(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
|
||||
CODE_SEGMENT(PAGE_DD_CODE)
|
||||
static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR * type2, NVT_TIMING *pT)
|
||||
{
|
||||
NvU32 totalPixels_in_2_fields;
|
||||
if (type2 == NULL || pT == NULL)
|
||||
return NVT_STATUS_ERR;
|
||||
|
||||
@ -679,32 +664,19 @@ static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR
|
||||
pT->interlaced = type2->options.interface_frame_scanning_type;
|
||||
|
||||
// the refresh rate
|
||||
if (pT->interlaced)
|
||||
{
|
||||
// in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
|
||||
totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
|
||||
// calculate the field rate in interlaced mode
|
||||
pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
|
||||
pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
|
||||
}
|
||||
else
|
||||
{
|
||||
// calculate frame rate in progressive mode
|
||||
// in progressive mode filed = frame
|
||||
pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
|
||||
pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
|
||||
}
|
||||
pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
|
||||
pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
|
||||
|
||||
pT->etc.aspect = 0;
|
||||
pT->etc.name[39] = '\0';
|
||||
pT->etc.rep = 0x1; // Bit mask for no pixel repetition
|
||||
|
||||
|
||||
pT->etc.status = NVT_STATUS_DISPLAYID_2;
|
||||
// Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have dependency on sequence
|
||||
// so we'll just update the preferred flag, not sequence them
|
||||
//pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
|
||||
pT->etc.flag |= type2->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
|
||||
|
||||
|
||||
/* Fields currently not used. Uncomment them for future use
|
||||
type1->options.stereo_support;
|
||||
*/
|
||||
@ -861,12 +833,12 @@ static NVT_STATUS parseDisplayIdTiming5Descriptor(DISPLAYID_TIMING_5_DESCRIPTOR
|
||||
{
|
||||
NvU32 width, height, rr;
|
||||
NvBool is1000div1001 = NV_FALSE;
|
||||
|
||||
|
||||
// we don't handle stereo type nor custom reduced blanking yet
|
||||
//NvU8 stereoType, formula;
|
||||
//stereoType = (desc->optns & NVT_DISPLAYID_TIMING_5_STEREO_SUPPORT_MASK);
|
||||
//formula = desc->optns & NVT_DISPLAYID_TIMING_5_FORMULA_SUPPORT_MASK;
|
||||
|
||||
|
||||
if (desc->optns & NVT_DISPLAYID_TIMING_5_FRACTIONAL_RR_SUPPORT_MASK)
|
||||
{
|
||||
is1000div1001 = NV_TRUE;
|
||||
@ -892,7 +864,7 @@ static NVT_STATUS parseDisplayIdTiming5(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
|
||||
for (i = 0; i * sizeof(DISPLAYID_TIMING_5_DESCRIPTOR) < blk->header.data_bytes; i++)
|
||||
{
|
||||
NVMISC_MEMSET(&newTiming, 0, sizeof(newTiming));
|
||||
|
||||
|
||||
if (parseDisplayIdTiming5Descriptor(blk->descriptors + i, &newTiming) == NVT_STATUS_SUCCESS)
|
||||
{
|
||||
if (pEdidInfo == NULL) continue;
|
||||
@ -1030,7 +1002,7 @@ static NVT_STATUS parseDisplayIdRangeLimits(NvU8 * block, NVT_DISPLAYID_INFO *pI
|
||||
|
||||
rl = pInfo->range_limits + pInfo->rl_num;
|
||||
(pInfo->rl_num)++;
|
||||
|
||||
|
||||
rl->pclk_min = minPclk;
|
||||
rl->pclk_max = maxPclk;
|
||||
|
||||
@ -1105,7 +1077,7 @@ static NVT_STATUS parseDisplayIdDeviceData(NvU8 * block, NVT_DISPLAYID_INFO *pIn
|
||||
pInfo->device_op_mode = DRF_VAL(T_DISPLAYID, _DEVICE, _OPERATING_MODE, blk->operating_mode);
|
||||
pInfo->support_backlight = DRF_VAL(T_DISPLAYID, _DEVICE, _BACKLIGHT, blk->operating_mode);
|
||||
pInfo->support_intensity = DRF_VAL(T_DISPLAYID, _DEVICE, _INTENSITY, blk->operating_mode);
|
||||
|
||||
|
||||
pInfo->horiz_pixel_count = blk->horizontal_pixel_count;
|
||||
pInfo->vert_pixel_count = blk->vertical_pixel_count;
|
||||
|
||||
@ -1278,7 +1250,7 @@ static NVT_STATUS parseDisplayIdStereo(NvU8 * block, NVT_DISPLAYID_INFO *pInfo)
|
||||
nvt_assert(0);
|
||||
return NVT_STATUS_ERR;
|
||||
}
|
||||
|
||||
|
||||
return NVT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1322,7 +1294,7 @@ static NVT_STATUS parseDisplayIdTiledDisplay(NvU8 * block, NVT_DISPLAYID_INFO *p
|
||||
pInfo->bezel_info.left = (blk->bezel_info.left * blk->bezel_info.pixel_density) / 10;
|
||||
|
||||
pInfo->tile_topology_id.vendor_id = (blk->topology_id.vendor_id[2] << 16) |
|
||||
(blk->topology_id.vendor_id[1] << 8 ) |
|
||||
(blk->topology_id.vendor_id[1] << 8 ) |
|
||||
blk->topology_id.vendor_id[0];
|
||||
|
||||
pInfo->tile_topology_id.product_id = (blk->topology_id.product_id[1] << 8) | blk->topology_id.product_id[0];
|
||||
@ -1350,7 +1322,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)
|
||||
if (pInfo == NULL) return NVT_STATUS_SUCCESS;
|
||||
|
||||
p861info = &pInfo->ext861;
|
||||
|
||||
|
||||
pInfo->ext_displayid.cea_data_block_present = 1;
|
||||
p861info->revision = blk->revision;
|
||||
|
||||
@ -1366,7 +1338,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)
|
||||
|
||||
//parse HDR related information from the HDR static metadata data block
|
||||
parseCea861HdrStaticMetadataDataBlock(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
|
||||
|
||||
|
||||
// base video
|
||||
parse861bShortTiming(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
|
||||
// yuv420-only video
|
||||
@ -1422,7 +1394,7 @@ static NVT_STATUS parseDisplayIdDisplayInterfaceFeatures(NvU8 * block, NVT_DISPL
|
||||
|
||||
// Minimum Pixel Rate at Which YCbCr 4:2:0 Encoding Is Supported
|
||||
pInfo->u4.display_interface_features.minimum_pixel_rate_ycbcr420 = blk->minimum_pixel_rate_ycbcr420;
|
||||
|
||||
|
||||
// Audio capability
|
||||
pInfo->u4.display_interface_features.audio_capability.support_32khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_32KHZ, blk->supported_audio_capability);
|
||||
pInfo->u4.display_interface_features.audio_capability.support_44_1khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_44_1KHZ, blk->supported_audio_capability);
|
||||
|
@ -235,7 +235,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)
|
||||
|
||||
if (totalPixelsIn2Fields != 0)
|
||||
{
|
||||
rr = (NvU16)axb_div_c(pclk * 2, 10000, totalPixelsIn2Fields);
|
||||
rr = (NvU16)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000, (NvU64)totalPixelsIn2Fields);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -244,7 +244,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)
|
||||
|
||||
if (totalPixels != 0)
|
||||
{
|
||||
rr = (NvU16)axb_div_c(pclk, 10000, totalPixels);
|
||||
rr = (NvU16)axb_div_c_64((NvU64)pclk, (NvU64)10000, (NvU64)totalPixels);
|
||||
}
|
||||
}
|
||||
return rr;
|
||||
@ -261,7 +261,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota
|
||||
|
||||
if (totalPixelsIn2Fields != 0)
|
||||
{
|
||||
rrx1k = (NvU32)axb_div_c(pclk * 2, 10000000, totalPixelsIn2Fields);
|
||||
rrx1k = (NvU32)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000000, (NvU64)totalPixelsIn2Fields);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -270,7 +270,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota
|
||||
|
||||
if (totalPixels != 0)
|
||||
{
|
||||
rrx1k = (NvU32)axb_div_c(pclk, 10000000, totalPixels);
|
||||
rrx1k = (NvU32)axb_div_c_64((NvU64)pclk, (NvU64)10000000, (NvU64)totalPixels);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,8 +70,8 @@ extern "C" {
|
||||
|
||||
// Link Transition Timeouts in miliseconds
|
||||
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
|
||||
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
|
||||
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
|
||||
#define NVLINK_TRANSITION_SAFE_TIMEOUT 70
|
||||
#define NVLINK_TRANSITION_HS_TIMEOUT 7000
|
||||
#define NVLINK_TRANSITION_ACTIVE_PENDING 2000
|
||||
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70
|
||||
|
||||
|
@ -222,8 +222,7 @@ _cci_module_cable_detect
|
||||
}
|
||||
default:
|
||||
{
|
||||
NVSWITCH_ASSERT(0);
|
||||
break;
|
||||
return -NVL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
@ -348,8 +347,9 @@ _cci_module_identify
|
||||
// Mark as faulty
|
||||
device->pCci->isFaulty[moduleId] = NV_TRUE;
|
||||
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: Module HW check failed. Module %d\n", __FUNCTION__, moduleId);
|
||||
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
|
||||
"Module %d faulty\n", moduleId);
|
||||
|
||||
return -NVL_ERR_GENERIC;
|
||||
}
|
||||
|
||||
@ -612,6 +612,9 @@ _cci_module_identify_async
|
||||
NvlStatus retval;
|
||||
PCCI pCci = device->pCci;
|
||||
CCI_MODULE_ONBOARD_STATE nextState;
|
||||
CCI_MODULE_STATE *pOnboardState;
|
||||
|
||||
pOnboardState = &device->pCci->moduleState[moduleId];
|
||||
|
||||
nvswitch_os_memset(&nextState, 0, sizeof(CCI_MODULE_ONBOARD_STATE));
|
||||
|
||||
@ -637,8 +640,9 @@ _cci_module_identify_async
|
||||
}
|
||||
default:
|
||||
{
|
||||
// Not expected
|
||||
NVSWITCH_ASSERT(0);
|
||||
// Invalid cable type
|
||||
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
|
||||
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
|
||||
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
|
||||
break;
|
||||
}
|
||||
@ -646,6 +650,8 @@ _cci_module_identify_async
|
||||
}
|
||||
else
|
||||
{
|
||||
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
|
||||
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
|
||||
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
|
||||
}
|
||||
|
||||
|
@ -7727,11 +7727,11 @@ nvswitch_ctrl_get_err_info_lr10
|
||||
}
|
||||
|
||||
// TODO NVidia TL not supported
|
||||
NVSWITCH_PRINT(device, WARN,
|
||||
NVSWITCH_PRINT(device, NOISY,
|
||||
"%s WARNING: Nvidia %s register %s does not exist!\n",
|
||||
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_ERRLOG_REG");
|
||||
|
||||
NVSWITCH_PRINT(device, WARN,
|
||||
NVSWITCH_PRINT(device, NOISY,
|
||||
"%s WARNING: Nvidia %s register %s does not exist!\n",
|
||||
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_INTEN_REG");
|
||||
|
||||
|
@ -1638,6 +1638,9 @@ nvswitch_cci_module_access_cmd_ls10
|
||||
// Mark as faulty
|
||||
device->pCci->isFaulty[osfp] = NV_TRUE;
|
||||
|
||||
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
|
||||
"Module %d access error\n", osfp);
|
||||
|
||||
return -NVL_IO_ERROR;
|
||||
}
|
||||
|
||||
|
@ -5549,6 +5549,29 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_nvswitch_dump_minion_ali_debug_registers_ls10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 link
|
||||
)
|
||||
{
|
||||
NVSWITCH_MINION_ALI_DEBUG_REGISTERS params;
|
||||
nvlink_link *nvlink = nvswitch_get_link(device, link);
|
||||
|
||||
if ((nvlink != NULL) &&
|
||||
(nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, ¶ms) == NVL_SUCCESS))
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: Minion error on link #%d!:\n"
|
||||
"Minion DLSTAT MN00 = 0x%x\n"
|
||||
"Minion DLSTAT UC01 = 0x%x\n"
|
||||
"Minion DLSTAT UC01 = 0x%x\n",
|
||||
__FUNCTION__, link,
|
||||
params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_nvswitch_emit_link_errors_minion_fatal_ls10
|
||||
(
|
||||
@ -5611,6 +5634,8 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
|
||||
enabledLinks &= ~bit;
|
||||
regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks);
|
||||
NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData);
|
||||
|
||||
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -5647,8 +5672,8 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
|
||||
switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
|
||||
{
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
|
||||
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
|
||||
break;
|
||||
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
|
||||
break;
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
|
||||
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt");
|
||||
break;
|
||||
@ -5660,6 +5685,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
|
||||
break;
|
||||
}
|
||||
|
||||
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -42,6 +42,11 @@
|
||||
#include "nvswitch/ls10/dev_minion_ip_addendum.h"
|
||||
#include "ls10/minion_nvlink_defines_public_ls10.h"
|
||||
|
||||
#define NV_NVLINK_TLREQ_TIMEOUT_ACTIVE 10000
|
||||
#define NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN 10
|
||||
#define NV_NVLINK_TLREQ_TIMEOUT_RESET 4
|
||||
#define NV_NVLINK_TLREQ_TIMEOUT_L2 5
|
||||
|
||||
static void
|
||||
_nvswitch_configure_reserved_throughput_counters
|
||||
(
|
||||
@ -143,9 +148,9 @@ nvswitch_init_lpwr_regs_ls10
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: Failed to set L1 Threshold\n",
|
||||
__FUNCTION__);
|
||||
__FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_corelib_training_complete_ls10
|
||||
@ -1433,7 +1438,7 @@ nvswitch_load_link_disable_settings_ls10
|
||||
nvswitch_device *device,
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
{
|
||||
NvU32 regVal;
|
||||
|
||||
// Read state from NVLIPT HW
|
||||
@ -1443,7 +1448,7 @@ nvswitch_load_link_disable_settings_ls10
|
||||
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
|
||||
{
|
||||
NVSWITCH_ASSERT(!cciIsLinkManaged(device, link->linkNumber));
|
||||
|
||||
|
||||
// Set link to invalid and unregister from corelib
|
||||
device->link[link->linkNumber].valid = NV_FALSE;
|
||||
nvlink_lib_unregister_link(link);
|
||||
@ -1589,7 +1594,7 @@ nvswitch_reset_and_train_link_ls10
|
||||
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
|
||||
|
||||
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
{
|
||||
|
||||
status = nvswitch_request_tl_link_state_ls10(link,
|
||||
@ -1683,6 +1688,39 @@ nvswitch_are_link_clocks_on_ls10
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
static
|
||||
NvlStatus
|
||||
_nvswitch_tl_request_get_timeout_value_ls10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 tlLinkState,
|
||||
NvU32 *timeoutVal
|
||||
)
|
||||
{
|
||||
switch (tlLinkState)
|
||||
{
|
||||
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_ACTIVE:
|
||||
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_ACTIVE;
|
||||
break;
|
||||
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET:
|
||||
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_RESET;
|
||||
break;
|
||||
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN:
|
||||
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN;
|
||||
break;
|
||||
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_L2:
|
||||
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_L2;
|
||||
break;
|
||||
default:
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: Invalid tlLinkState %d provided!\n",
|
||||
__FUNCTION__, tlLinkState);
|
||||
return NVL_BAD_ARGS;
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_request_tl_link_state_ls10
|
||||
(
|
||||
@ -1696,6 +1734,9 @@ nvswitch_request_tl_link_state_ls10
|
||||
NvU32 linkStatus;
|
||||
NvU32 lnkErrStatus;
|
||||
NvU32 bit;
|
||||
NvU32 timeoutVal;
|
||||
NVSWITCH_TIMEOUT timeout;
|
||||
NvBool keepPolling;
|
||||
|
||||
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
|
||||
{
|
||||
@ -1729,17 +1770,43 @@ nvswitch_request_tl_link_state_ls10
|
||||
|
||||
if (bSync)
|
||||
{
|
||||
// Wait for the TL link state register to complete
|
||||
status = nvswitch_wait_for_tl_request_ready_lr10(link);
|
||||
|
||||
// setup timeouts for the TL request
|
||||
status = _nvswitch_tl_request_get_timeout_value_ls10(device, tlLinkState, &timeoutVal);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
return status;
|
||||
return NVL_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
nvswitch_timeout_create(NVSWITCH_INTERVAL_1MSEC_IN_NS * timeoutVal, &timeout);
|
||||
status = NVL_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
do
|
||||
{
|
||||
keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
|
||||
|
||||
// Check for state requested
|
||||
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
|
||||
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
|
||||
|
||||
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) ==
|
||||
tlLinkState)
|
||||
{
|
||||
status = NVL_SUCCESS;
|
||||
break;
|
||||
}
|
||||
|
||||
nvswitch_os_sleep(1);
|
||||
}
|
||||
while(keepPolling);
|
||||
|
||||
// Do one final check if the polling loop didn't see the target linkState
|
||||
if (status == NVL_MORE_PROCESSING_REQUIRED)
|
||||
{
|
||||
// Check for state requested
|
||||
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
|
||||
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
|
||||
|
||||
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
|
||||
tlLinkState)
|
||||
{
|
||||
@ -1750,6 +1817,8 @@ nvswitch_request_tl_link_state_ls10
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -36,7 +36,36 @@ extern "C" {
|
||||
#define RUSD_TIMESTAMP_WRITE_IN_PROGRESS (NV_U64_MAX)
|
||||
#define RUSD_TIMESTAMP_INVALID 0
|
||||
|
||||
#define RUSD_SEQ_DATA_VALID(x) ((((NvU32)(x)) & 0x1U) == 0)
|
||||
// seq = c_0 * b_0 + c_1 * (b_0 - 1) where c_0 == open_count and c_1 == close_count
|
||||
// When they are equal, data is valid, otherwise data is being written.
|
||||
// b_0 == 1 mod (b_0 - 1) and b_0 - 1 == (-1) mod b_0
|
||||
// So, c_0 == seq mod (b_0 - 1) and c_1 == (-1 * seq) mod b_0
|
||||
// c_1 cannot be calculated quite so naively because negative modulos aren't fun, so we
|
||||
// instead do c_1 == (b_0 - (seq mod b_0)) mod b_0
|
||||
//
|
||||
#define RUSD_SEQ_BASE_SHIFT 20llu
|
||||
#define RUSD_SEQ_BASE0 (1llu << RUSD_SEQ_BASE_SHIFT)
|
||||
#define RUSD_SEQ_BASE1 (RUSD_SEQ_BASE0 - 1llu)
|
||||
#define RUSD_SEQ_COEFF1(x) ((RUSD_SEQ_BASE0 - ((x) % RUSD_SEQ_BASE0)) % RUSD_SEQ_BASE0)
|
||||
#define RUSD_SEQ_COEFF0(x) ((x) % RUSD_SEQ_BASE1)
|
||||
#define RUSD_SEQ_WRAP_SHIFT 18llu
|
||||
#define RUSD_SEQ_WRAP_VAL (1llu << RUSD_SEQ_WRAP_SHIFT)
|
||||
#define RUSD_SEQ_DATA_VALID(x) (RUSD_SEQ_COEFF0(x) == RUSD_SEQ_COEFF1(x))
|
||||
|
||||
//
|
||||
// Helper macros to check seq before reading RUSD.
|
||||
// No dowhile wrap as it is using continue/break
|
||||
//
|
||||
#define RUSD_SEQ_CHECK1(SHARED_DATA) \
|
||||
NvU64 seq = (SHARED_DATA)->seq; \
|
||||
portAtomicMemoryFenceLoad(); \
|
||||
if (!RUSD_SEQ_DATA_VALID(seq)) \
|
||||
continue;
|
||||
|
||||
#define RUSD_SEQ_CHECK2(SHARED_DATA) \
|
||||
portAtomicMemoryFenceLoad(); \
|
||||
if (seq == (SHARED_DATA)->seq) \
|
||||
break;
|
||||
|
||||
enum {
|
||||
RUSD_CLK_PUBLIC_DOMAIN_GRAPHICS = 0,
|
||||
@ -166,10 +195,12 @@ typedef struct RUSD_INST_POWER_USAGE {
|
||||
} RUSD_INST_POWER_USAGE;
|
||||
|
||||
typedef struct NV00DE_SHARED_DATA {
|
||||
volatile NvU32 seq;
|
||||
volatile NvU64 seq;
|
||||
|
||||
NvU32 bar1Size;
|
||||
NvU32 bar1AvailSize;
|
||||
NvU64 totalPmaMemory;
|
||||
NvU64 freePmaMemory;
|
||||
|
||||
// GSP polling data section
|
||||
NV_DECLARE_ALIGNED(RUSD_CLK_PUBLIC_DOMAIN_INFOS clkPublicDomainInfos, 8);
|
||||
|
@ -853,7 +853,8 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
|
||||
} NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS;
|
||||
|
||||
/* vGPU capabilities */
|
||||
#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU 0
|
||||
#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU 0
|
||||
#define NVA081_CTRL_VGPU_CAPABILITY_COMPUTE_MEDIA_ENGINE_GPU 1
|
||||
|
||||
/*
|
||||
* NVA081_CTRL_CMD_VGPU_SET_CAPABILITY
|
||||
@ -872,7 +873,7 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
|
||||
* NV_ERR_OBJECT_NOT_FOUND
|
||||
* NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */
|
||||
#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */
|
||||
|
||||
#define NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID (0x1eU)
|
||||
|
||||
@ -881,4 +882,30 @@ typedef struct NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS {
|
||||
NvBool state;
|
||||
} NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS;
|
||||
|
||||
/*
|
||||
* NVA081_CTRL_CMD_VGPU_GET_CAPABILITY
|
||||
*
|
||||
* This command is to get state of vGPU capability for the physical GPU.
|
||||
*
|
||||
* capability [IN]
|
||||
* This param specifies the requested capabiity of the device that is to be set
|
||||
* One of NVA081_CTRL_VGPU_CAPABILITY* values
|
||||
*
|
||||
* state [OUT]
|
||||
* This param specifies the state of the capability
|
||||
*
|
||||
* Possible status values returned are:
|
||||
* NV_OK
|
||||
* NV_ERR_OBJECT_NOT_FOUND
|
||||
* NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
#define NVA081_CTRL_CMD_VGPU_GET_CAPABILITY (0xa081011f) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID" */
|
||||
|
||||
#define NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID (0x1fU)
|
||||
|
||||
typedef struct NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS {
|
||||
NvU32 capability;
|
||||
NvBool state;
|
||||
} NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS;
|
||||
|
||||
/* _ctrlA081vgpuconfig_h_ */
|
||||
|
@ -44,151 +44,6 @@ ENTRY(0x13BD, 0x11D6, 0x10de, "GRID M10-8A"),
|
||||
ENTRY(0x13BD, 0x1286, 0x10de, "GRID M10-2B"),
|
||||
ENTRY(0x13BD, 0x12EE, 0x10de, "GRID M10-2B4"),
|
||||
ENTRY(0x13BD, 0x1339, 0x10de, "GRID M10-1B4"),
|
||||
ENTRY(0x13F2, 0x114C, 0x10de, "GRID M60-0Q"),
|
||||
ENTRY(0x13F2, 0x114D, 0x10de, "GRID M60-1Q"),
|
||||
ENTRY(0x13F2, 0x114E, 0x10de, "GRID M60-2Q"),
|
||||
ENTRY(0x13F2, 0x114F, 0x10de, "GRID M60-4Q"),
|
||||
ENTRY(0x13F2, 0x1150, 0x10de, "GRID M60-8Q"),
|
||||
ENTRY(0x13F2, 0x1176, 0x10de, "GRID M60-0B"),
|
||||
ENTRY(0x13F2, 0x1177, 0x10de, "GRID M60-1B"),
|
||||
ENTRY(0x13F2, 0x117D, 0x10de, "GRID M60-2B"),
|
||||
ENTRY(0x13F2, 0x11AE, 0x10de, "GRID M60-1A"),
|
||||
ENTRY(0x13F2, 0x11AF, 0x10de, "GRID M60-2A"),
|
||||
ENTRY(0x13F2, 0x11B0, 0x10de, "GRID M60-4A"),
|
||||
ENTRY(0x13F2, 0x11B1, 0x10de, "GRID M60-8A"),
|
||||
ENTRY(0x13F2, 0x12EC, 0x10de, "GRID M60-2B4"),
|
||||
ENTRY(0x13F2, 0x1337, 0x10de, "GRID M60-1B4"),
|
||||
ENTRY(0x13F3, 0x117C, 0x10de, "GRID M6-2B"),
|
||||
ENTRY(0x13F3, 0x117E, 0x10de, "GRID M6-0B"),
|
||||
ENTRY(0x13F3, 0x117F, 0x10de, "GRID M6-1B"),
|
||||
ENTRY(0x13F3, 0x1180, 0x10de, "GRID M6-0Q"),
|
||||
ENTRY(0x13F3, 0x1181, 0x10de, "GRID M6-1Q"),
|
||||
ENTRY(0x13F3, 0x1182, 0x10de, "GRID M6-2Q"),
|
||||
ENTRY(0x13F3, 0x1183, 0x10de, "GRID M6-4Q"),
|
||||
ENTRY(0x13F3, 0x1184, 0x10de, "GRID M6-8Q"),
|
||||
ENTRY(0x13F3, 0x11AA, 0x10de, "GRID M6-1A"),
|
||||
ENTRY(0x13F3, 0x11AB, 0x10de, "GRID M6-2A"),
|
||||
ENTRY(0x13F3, 0x11AC, 0x10de, "GRID M6-4A"),
|
||||
ENTRY(0x13F3, 0x11AD, 0x10de, "GRID M6-8A"),
|
||||
ENTRY(0x13F3, 0x12ED, 0x10de, "GRID M6-2B4"),
|
||||
ENTRY(0x13F3, 0x1338, 0x10de, "GRID M6-1B4"),
|
||||
ENTRY(0x15F7, 0x1265, 0x10de, "GRID P100C-1B"),
|
||||
ENTRY(0x15F7, 0x1266, 0x10de, "GRID P100C-1Q"),
|
||||
ENTRY(0x15F7, 0x1267, 0x10de, "GRID P100C-2Q"),
|
||||
ENTRY(0x15F7, 0x1268, 0x10de, "GRID P100C-4Q"),
|
||||
ENTRY(0x15F7, 0x1269, 0x10de, "GRID P100C-6Q"),
|
||||
ENTRY(0x15F7, 0x126A, 0x10de, "GRID P100C-12Q"),
|
||||
ENTRY(0x15F7, 0x126B, 0x10de, "GRID P100C-1A"),
|
||||
ENTRY(0x15F7, 0x126C, 0x10de, "GRID P100C-2A"),
|
||||
ENTRY(0x15F7, 0x126D, 0x10de, "GRID P100C-4A"),
|
||||
ENTRY(0x15F7, 0x126E, 0x10de, "GRID P100C-6A"),
|
||||
ENTRY(0x15F7, 0x126F, 0x10de, "GRID P100C-12A"),
|
||||
ENTRY(0x15F7, 0x128D, 0x10de, "GRID P100C-2B"),
|
||||
ENTRY(0x15F7, 0x12F4, 0x10de, "GRID P100C-2B4"),
|
||||
ENTRY(0x15F7, 0x133F, 0x10de, "GRID P100C-1B4"),
|
||||
ENTRY(0x15F7, 0x137D, 0x10de, "GRID P100C-12C"),
|
||||
ENTRY(0x15F7, 0x138C, 0x10de, "GRID P100C-4C"),
|
||||
ENTRY(0x15F7, 0x138D, 0x10de, "GRID P100C-6C"),
|
||||
ENTRY(0x15F8, 0x1221, 0x10de, "GRID P100-1B"),
|
||||
ENTRY(0x15F8, 0x1222, 0x10de, "GRID P100-1Q"),
|
||||
ENTRY(0x15F8, 0x1223, 0x10de, "GRID P100-2Q"),
|
||||
ENTRY(0x15F8, 0x1224, 0x10de, "GRID P100-4Q"),
|
||||
ENTRY(0x15F8, 0x1225, 0x10de, "GRID P100-8Q"),
|
||||
ENTRY(0x15F8, 0x1226, 0x10de, "GRID P100-16Q"),
|
||||
ENTRY(0x15F8, 0x1227, 0x10de, "GRID P100-1A"),
|
||||
ENTRY(0x15F8, 0x1228, 0x10de, "GRID P100-2A"),
|
||||
ENTRY(0x15F8, 0x1229, 0x10de, "GRID P100-4A"),
|
||||
ENTRY(0x15F8, 0x122A, 0x10de, "GRID P100-8A"),
|
||||
ENTRY(0x15F8, 0x122B, 0x10de, "GRID P100-16A"),
|
||||
ENTRY(0x15F8, 0x128C, 0x10de, "GRID P100-2B"),
|
||||
ENTRY(0x15F8, 0x12F2, 0x10de, "GRID P100-2B4"),
|
||||
ENTRY(0x15F8, 0x133D, 0x10de, "GRID P100-1B4"),
|
||||
ENTRY(0x15F8, 0x137C, 0x10de, "GRID P100-16C"),
|
||||
ENTRY(0x15F8, 0x138A, 0x10de, "GRID P100-4C"),
|
||||
ENTRY(0x15F8, 0x138B, 0x10de, "GRID P100-8C"),
|
||||
ENTRY(0x15F9, 0x122C, 0x10de, "GRID P100X-1B"),
|
||||
ENTRY(0x15F9, 0x122D, 0x10de, "GRID P100X-1Q"),
|
||||
ENTRY(0x15F9, 0x122E, 0x10de, "GRID P100X-2Q"),
|
||||
ENTRY(0x15F9, 0x122F, 0x10de, "GRID P100X-4Q"),
|
||||
ENTRY(0x15F9, 0x1230, 0x10de, "GRID P100X-8Q"),
|
||||
ENTRY(0x15F9, 0x1231, 0x10de, "GRID P100X-16Q"),
|
||||
ENTRY(0x15F9, 0x1232, 0x10de, "GRID P100X-1A"),
|
||||
ENTRY(0x15F9, 0x1233, 0x10de, "GRID P100X-2A"),
|
||||
ENTRY(0x15F9, 0x1234, 0x10de, "GRID P100X-4A"),
|
||||
ENTRY(0x15F9, 0x1235, 0x10de, "GRID P100X-8A"),
|
||||
ENTRY(0x15F9, 0x1236, 0x10de, "GRID P100X-16A"),
|
||||
ENTRY(0x15F9, 0x128B, 0x10de, "GRID P100X-2B"),
|
||||
ENTRY(0x15F9, 0x12F3, 0x10de, "GRID P100X-2B4"),
|
||||
ENTRY(0x15F9, 0x133E, 0x10de, "GRID P100X-1B4"),
|
||||
ENTRY(0x15F9, 0x137B, 0x10de, "GRID P100X-16C"),
|
||||
ENTRY(0x15F9, 0x1388, 0x10de, "GRID P100X-4C"),
|
||||
ENTRY(0x15F9, 0x1389, 0x10de, "GRID P100X-8C"),
|
||||
ENTRY(0x1B38, 0x11E7, 0x10de, "GRID P40-1B"),
|
||||
ENTRY(0x1B38, 0x11E8, 0x10de, "GRID P40-1Q"),
|
||||
ENTRY(0x1B38, 0x11E9, 0x10de, "GRID P40-2Q"),
|
||||
ENTRY(0x1B38, 0x11EA, 0x10de, "GRID P40-3Q"),
|
||||
ENTRY(0x1B38, 0x11EB, 0x10de, "GRID P40-4Q"),
|
||||
ENTRY(0x1B38, 0x11EC, 0x10de, "GRID P40-6Q"),
|
||||
ENTRY(0x1B38, 0x11ED, 0x10de, "GRID P40-8Q"),
|
||||
ENTRY(0x1B38, 0x11EE, 0x10de, "GRID P40-12Q"),
|
||||
ENTRY(0x1B38, 0x11EF, 0x10de, "GRID P40-24Q"),
|
||||
ENTRY(0x1B38, 0x11F0, 0x10de, "GRID P40-1A"),
|
||||
ENTRY(0x1B38, 0x11F1, 0x10de, "GRID P40-2A"),
|
||||
ENTRY(0x1B38, 0x11F2, 0x10de, "GRID P40-3A"),
|
||||
ENTRY(0x1B38, 0x11F3, 0x10de, "GRID P40-4A"),
|
||||
ENTRY(0x1B38, 0x11F4, 0x10de, "GRID P40-6A"),
|
||||
ENTRY(0x1B38, 0x11F5, 0x10de, "GRID P40-8A"),
|
||||
ENTRY(0x1B38, 0x11F6, 0x10de, "GRID P40-12A"),
|
||||
ENTRY(0x1B38, 0x11F7, 0x10de, "GRID P40-24A"),
|
||||
ENTRY(0x1B38, 0x1287, 0x10de, "GRID P40-2B"),
|
||||
ENTRY(0x1B38, 0x12B1, 0x10de, "GeForce GTX P40-24"),
|
||||
ENTRY(0x1B38, 0x12B2, 0x10de, "GeForce GTX P40-12"),
|
||||
ENTRY(0x1B38, 0x12B3, 0x10de, "GeForce GTX P40-6"),
|
||||
ENTRY(0x1B38, 0x12EF, 0x10de, "GRID P40-2B4"),
|
||||
ENTRY(0x1B38, 0x133A, 0x10de, "GRID P40-1B4"),
|
||||
ENTRY(0x1B38, 0x137E, 0x10de, "GRID P40-24C"),
|
||||
ENTRY(0x1B38, 0x1381, 0x10de, "GRID P40-4C"),
|
||||
ENTRY(0x1B38, 0x1382, 0x10de, "GRID P40-6C"),
|
||||
ENTRY(0x1B38, 0x1383, 0x10de, "GRID P40-8C"),
|
||||
ENTRY(0x1B38, 0x1384, 0x10de, "GRID P40-12C"),
|
||||
ENTRY(0x1B38, 0x13B0, 0x10de, "GRID GTX P40-6"),
|
||||
ENTRY(0x1B38, 0x13B1, 0x10de, "GRID GTX P40-12"),
|
||||
ENTRY(0x1B38, 0x13B2, 0x10de, "GRID GTX P40-24"),
|
||||
ENTRY(0x1B38, 0x13D0, 0x10de, "GRID GTX P40-8"),
|
||||
ENTRY(0x1BB3, 0x1203, 0x10de, "GRID P4-1B"),
|
||||
ENTRY(0x1BB3, 0x1204, 0x10de, "GRID P4-1Q"),
|
||||
ENTRY(0x1BB3, 0x1205, 0x10de, "GRID P4-2Q"),
|
||||
ENTRY(0x1BB3, 0x1206, 0x10de, "GRID P4-4Q"),
|
||||
ENTRY(0x1BB3, 0x1207, 0x10de, "GRID P4-8Q"),
|
||||
ENTRY(0x1BB3, 0x1208, 0x10de, "GRID P4-1A"),
|
||||
ENTRY(0x1BB3, 0x1209, 0x10de, "GRID P4-2A"),
|
||||
ENTRY(0x1BB3, 0x120A, 0x10de, "GRID P4-4A"),
|
||||
ENTRY(0x1BB3, 0x120B, 0x10de, "GRID P4-8A"),
|
||||
ENTRY(0x1BB3, 0x1288, 0x10de, "GRID P4-2B"),
|
||||
ENTRY(0x1BB3, 0x12F1, 0x10de, "GRID P4-2B4"),
|
||||
ENTRY(0x1BB3, 0x133C, 0x10de, "GRID P4-1B4"),
|
||||
ENTRY(0x1BB3, 0x136D, 0x10de, "GRID GTX P4-2"),
|
||||
ENTRY(0x1BB3, 0x136E, 0x10de, "GRID GTX P4-4"),
|
||||
ENTRY(0x1BB3, 0x136F, 0x10de, "GRID GTX P4-8"),
|
||||
ENTRY(0x1BB3, 0x1380, 0x10de, "GRID P4-8C"),
|
||||
ENTRY(0x1BB3, 0x1385, 0x10de, "GRID P4-4C"),
|
||||
ENTRY(0x1BB4, 0x11F8, 0x10de, "GRID P6-1B"),
|
||||
ENTRY(0x1BB4, 0x11F9, 0x10de, "GRID P6-1Q"),
|
||||
ENTRY(0x1BB4, 0x11FA, 0x10de, "GRID P6-2Q"),
|
||||
ENTRY(0x1BB4, 0x11FB, 0x10de, "GRID P6-4Q"),
|
||||
ENTRY(0x1BB4, 0x11FC, 0x10de, "GRID P6-8Q"),
|
||||
ENTRY(0x1BB4, 0x11FD, 0x10de, "GRID P6-16Q"),
|
||||
ENTRY(0x1BB4, 0x11FE, 0x10de, "GRID P6-1A"),
|
||||
ENTRY(0x1BB4, 0x11FF, 0x10de, "GRID P6-2A"),
|
||||
ENTRY(0x1BB4, 0x1200, 0x10de, "GRID P6-4A"),
|
||||
ENTRY(0x1BB4, 0x1201, 0x10de, "GRID P6-8A"),
|
||||
ENTRY(0x1BB4, 0x1202, 0x10de, "GRID P6-16A"),
|
||||
ENTRY(0x1BB4, 0x1289, 0x10de, "GRID P6-2B"),
|
||||
ENTRY(0x1BB4, 0x12F0, 0x10de, "GRID P6-2B4"),
|
||||
ENTRY(0x1BB4, 0x133B, 0x10de, "GRID P6-1B4"),
|
||||
ENTRY(0x1BB4, 0x137F, 0x10de, "GRID P6-16C"),
|
||||
ENTRY(0x1BB4, 0x1386, 0x10de, "GRID P6-4C"),
|
||||
ENTRY(0x1BB4, 0x1387, 0x10de, "GRID P6-8C"),
|
||||
ENTRY(0x1DB1, 0x1259, 0x10de, "GRID V100X-1B"),
|
||||
ENTRY(0x1DB1, 0x125A, 0x10de, "GRID V100X-1Q"),
|
||||
ENTRY(0x1DB1, 0x125B, 0x10de, "GRID V100X-2Q"),
|
||||
@ -813,6 +668,20 @@ ENTRY(0x2324, 0x18E0, 0x10de, "NVIDIA H800XM-16C"),
|
||||
ENTRY(0x2324, 0x18E1, 0x10de, "NVIDIA H800XM-20C"),
|
||||
ENTRY(0x2324, 0x18E2, 0x10de, "NVIDIA H800XM-40C"),
|
||||
ENTRY(0x2324, 0x18E3, 0x10de, "NVIDIA H800XM-80C"),
|
||||
ENTRY(0x2329, 0x2028, 0x10de, "NVIDIA H20-1-12CME"),
|
||||
ENTRY(0x2329, 0x2029, 0x10de, "NVIDIA H20-1-12C"),
|
||||
ENTRY(0x2329, 0x202A, 0x10de, "NVIDIA H20-1-24C"),
|
||||
ENTRY(0x2329, 0x202B, 0x10de, "NVIDIA H20-2-24C"),
|
||||
ENTRY(0x2329, 0x202C, 0x10de, "NVIDIA H20-3-48C"),
|
||||
ENTRY(0x2329, 0x202D, 0x10de, "NVIDIA H20-4-48C"),
|
||||
ENTRY(0x2329, 0x202E, 0x10de, "NVIDIA H20-7-96C"),
|
||||
ENTRY(0x2329, 0x202F, 0x10de, "NVIDIA H20-4C"),
|
||||
ENTRY(0x2329, 0x2030, 0x10de, "NVIDIA H20-6C"),
|
||||
ENTRY(0x2329, 0x2031, 0x10de, "NVIDIA H20-12C"),
|
||||
ENTRY(0x2329, 0x2032, 0x10de, "NVIDIA H20-16C"),
|
||||
ENTRY(0x2329, 0x2033, 0x10de, "NVIDIA H20-24C"),
|
||||
ENTRY(0x2329, 0x2034, 0x10de, "NVIDIA H20-48C"),
|
||||
ENTRY(0x2329, 0x2035, 0x10de, "NVIDIA H20-96C"),
|
||||
ENTRY(0x2330, 0x187A, 0x10de, "NVIDIA H100XM-1-10CME"),
|
||||
ENTRY(0x2330, 0x187B, 0x10de, "NVIDIA H100XM-1-10C"),
|
||||
ENTRY(0x2330, 0x187C, 0x10de, "NVIDIA H100XM-1-20C"),
|
||||
@ -883,14 +752,14 @@ ENTRY(0x233A, 0x186B, 0x10de, "NVIDIA H800L-15C"),
|
||||
ENTRY(0x233A, 0x186C, 0x10de, "NVIDIA H800L-23C"),
|
||||
ENTRY(0x233A, 0x186D, 0x10de, "NVIDIA H800L-47C"),
|
||||
ENTRY(0x233A, 0x186E, 0x10de, "NVIDIA H800L-94C"),
|
||||
ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA H100GL-1-12CME"),
|
||||
ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA H100GL-1-12C"),
|
||||
ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA H100GL-1-24C"),
|
||||
ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA H100GL-2-24C"),
|
||||
ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA H100GL-3-48C"),
|
||||
ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA H100GL-4-48C"),
|
||||
ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA H100GL-7-96C"),
|
||||
ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA H100GL-96C"),
|
||||
ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA GH200-1-12CME"),
|
||||
ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA GH200-1-12C"),
|
||||
ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA GH200-1-24C"),
|
||||
ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA GH200-2-24C"),
|
||||
ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA GH200-3-48C"),
|
||||
ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA GH200-4-48C"),
|
||||
ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA GH200-7-96C"),
|
||||
ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA GH200-96C"),
|
||||
ENTRY(0x25B6, 0x159D, 0x10de, "NVIDIA A16-1B"),
|
||||
ENTRY(0x25B6, 0x159E, 0x10de, "NVIDIA A16-2B"),
|
||||
ENTRY(0x25B6, 0x159F, 0x10de, "NVIDIA A16-1Q"),
|
||||
@ -987,6 +856,45 @@ ENTRY(0x26B2, 0x1835, 0x10de, "NVIDIA RTX5000-Ada-4C"),
|
||||
ENTRY(0x26B2, 0x1836, 0x10de, "NVIDIA RTX5000-Ada-8C"),
|
||||
ENTRY(0x26B2, 0x1837, 0x10de, "NVIDIA RTX5000-Ada-16C"),
|
||||
ENTRY(0x26B2, 0x1838, 0x10de, "NVIDIA RTX5000-Ada-32C"),
|
||||
ENTRY(0x26B3, 0x1958, 0x10de, "NVIDIA RTX 5880-Ada-1B"),
|
||||
ENTRY(0x26B3, 0x1959, 0x10de, "NVIDIA RTX 5880-Ada-2B"),
|
||||
ENTRY(0x26B3, 0x195A, 0x10de, "NVIDIA RTX 5880-Ada-1Q"),
|
||||
ENTRY(0x26B3, 0x195B, 0x10de, "NVIDIA RTX 5880-Ada-2Q"),
|
||||
ENTRY(0x26B3, 0x195C, 0x10de, "NVIDIA RTX 5880-Ada-3Q"),
|
||||
ENTRY(0x26B3, 0x195D, 0x10de, "NVIDIA RTX 5880-Ada-4Q"),
|
||||
ENTRY(0x26B3, 0x195E, 0x10de, "NVIDIA RTX 5880-Ada-6Q"),
|
||||
ENTRY(0x26B3, 0x195F, 0x10de, "NVIDIA RTX 5880-Ada-8Q"),
|
||||
ENTRY(0x26B3, 0x1960, 0x10de, "NVIDIA RTX 5880-Ada-12Q"),
|
||||
ENTRY(0x26B3, 0x1961, 0x10de, "NVIDIA RTX 5880-Ada-16Q"),
|
||||
ENTRY(0x26B3, 0x1962, 0x10de, "NVIDIA RTX 5880-Ada-24Q"),
|
||||
ENTRY(0x26B3, 0x1963, 0x10de, "NVIDIA RTX 5880-Ada-48Q"),
|
||||
ENTRY(0x26B3, 0x1964, 0x10de, "NVIDIA RTX 5880-Ada-1A"),
|
||||
ENTRY(0x26B3, 0x1965, 0x10de, "NVIDIA RTX 5880-Ada-2A"),
|
||||
ENTRY(0x26B3, 0x1966, 0x10de, "NVIDIA RTX 5880-Ada-3A"),
|
||||
ENTRY(0x26B3, 0x1967, 0x10de, "NVIDIA RTX 5880-Ada-4A"),
|
||||
ENTRY(0x26B3, 0x1968, 0x10de, "NVIDIA RTX 5880-Ada-6A"),
|
||||
ENTRY(0x26B3, 0x1969, 0x10de, "NVIDIA RTX 5880-Ada-8A"),
|
||||
ENTRY(0x26B3, 0x196A, 0x10de, "NVIDIA RTX 5880-Ada-12A"),
|
||||
ENTRY(0x26B3, 0x196B, 0x10de, "NVIDIA RTX 5880-Ada-16A"),
|
||||
ENTRY(0x26B3, 0x196C, 0x10de, "NVIDIA RTX 5880-Ada-24A"),
|
||||
ENTRY(0x26B3, 0x196D, 0x10de, "NVIDIA RTX 5880-Ada-48A"),
|
||||
ENTRY(0x26B3, 0x196E, 0x10de, "NVIDIA RTX 5880-Ada-1"),
|
||||
ENTRY(0x26B3, 0x196F, 0x10de, "NVIDIA RTX 5880-Ada-2"),
|
||||
ENTRY(0x26B3, 0x1970, 0x10de, "NVIDIA RTX 5880-Ada-3"),
|
||||
ENTRY(0x26B3, 0x1971, 0x10de, "NVIDIA RTX 5880-Ada-4"),
|
||||
ENTRY(0x26B3, 0x1972, 0x10de, "NVIDIA RTX 5880-Ada-6"),
|
||||
ENTRY(0x26B3, 0x1973, 0x10de, "NVIDIA RTX 5880-Ada-8"),
|
||||
ENTRY(0x26B3, 0x1974, 0x10de, "NVIDIA RTX 5880-Ada-12"),
|
||||
ENTRY(0x26B3, 0x1975, 0x10de, "NVIDIA RTX 5880-Ada-16"),
|
||||
ENTRY(0x26B3, 0x1976, 0x10de, "NVIDIA RTX 5880-Ada-24"),
|
||||
ENTRY(0x26B3, 0x1977, 0x10de, "NVIDIA RTX 5880-Ada-48"),
|
||||
ENTRY(0x26B3, 0x1978, 0x10de, "NVIDIA RTX 5880-Ada-4C"),
|
||||
ENTRY(0x26B3, 0x1979, 0x10de, "NVIDIA RTX 5880-Ada-6C"),
|
||||
ENTRY(0x26B3, 0x197A, 0x10de, "NVIDIA RTX 5880-Ada-8C"),
|
||||
ENTRY(0x26B3, 0x197B, 0x10de, "NVIDIA RTX 5880-Ada-12C"),
|
||||
ENTRY(0x26B3, 0x197C, 0x10de, "NVIDIA RTX 5880-Ada-16C"),
|
||||
ENTRY(0x26B3, 0x197D, 0x10de, "NVIDIA RTX 5880-Ada-24C"),
|
||||
ENTRY(0x26B3, 0x197E, 0x10de, "NVIDIA RTX 5880-Ada-48C"),
|
||||
ENTRY(0x26B5, 0x176D, 0x10de, "NVIDIA L40-1B"),
|
||||
ENTRY(0x26B5, 0x176E, 0x10de, "NVIDIA L40-2B"),
|
||||
ENTRY(0x26B5, 0x176F, 0x10de, "NVIDIA L40-1Q"),
|
||||
@ -1102,6 +1010,78 @@ ENTRY(0x26B9, 0x18AE, 0x10de, "NVIDIA L40S-12C"),
|
||||
ENTRY(0x26B9, 0x18AF, 0x10de, "NVIDIA L40S-16C"),
|
||||
ENTRY(0x26B9, 0x18B0, 0x10de, "NVIDIA L40S-24C"),
|
||||
ENTRY(0x26B9, 0x18B1, 0x10de, "NVIDIA L40S-48C"),
|
||||
ENTRY(0x26BA, 0x1909, 0x10de, "NVIDIA L20-1B"),
|
||||
ENTRY(0x26BA, 0x190A, 0x10de, "NVIDIA L20-2B"),
|
||||
ENTRY(0x26BA, 0x190B, 0x10de, "NVIDIA L20-1Q"),
|
||||
ENTRY(0x26BA, 0x190C, 0x10de, "NVIDIA L20-2Q"),
|
||||
ENTRY(0x26BA, 0x190D, 0x10de, "NVIDIA L20-3Q"),
|
||||
ENTRY(0x26BA, 0x190E, 0x10de, "NVIDIA L20-4Q"),
|
||||
ENTRY(0x26BA, 0x190F, 0x10de, "NVIDIA L20-6Q"),
|
||||
ENTRY(0x26BA, 0x1910, 0x10de, "NVIDIA L20-8Q"),
|
||||
ENTRY(0x26BA, 0x1911, 0x10de, "NVIDIA L20-12Q"),
|
||||
ENTRY(0x26BA, 0x1912, 0x10de, "NVIDIA L20-16Q"),
|
||||
ENTRY(0x26BA, 0x1913, 0x10de, "NVIDIA L20-24Q"),
|
||||
ENTRY(0x26BA, 0x1914, 0x10de, "NVIDIA L20-48Q"),
|
||||
ENTRY(0x26BA, 0x1915, 0x10de, "NVIDIA L20-1A"),
|
||||
ENTRY(0x26BA, 0x1916, 0x10de, "NVIDIA L20-2A"),
|
||||
ENTRY(0x26BA, 0x1917, 0x10de, "NVIDIA L20-3A"),
|
||||
ENTRY(0x26BA, 0x1918, 0x10de, "NVIDIA L20-4A"),
|
||||
ENTRY(0x26BA, 0x1919, 0x10de, "NVIDIA L20-6A"),
|
||||
ENTRY(0x26BA, 0x191A, 0x10de, "NVIDIA L20-8A"),
|
||||
ENTRY(0x26BA, 0x191B, 0x10de, "NVIDIA L20-12A"),
|
||||
ENTRY(0x26BA, 0x191C, 0x10de, "NVIDIA L20-16A"),
|
||||
ENTRY(0x26BA, 0x191D, 0x10de, "NVIDIA L20-24A"),
|
||||
ENTRY(0x26BA, 0x191E, 0x10de, "NVIDIA L20-48A"),
|
||||
ENTRY(0x26BA, 0x191F, 0x10de, "NVIDIA GeForce RTX 3050"),
|
||||
ENTRY(0x26BA, 0x1920, 0x10de, "NVIDIA GeForce RTX 3060"),
|
||||
ENTRY(0x26BA, 0x1921, 0x10de, "NVIDIA L20-1"),
|
||||
ENTRY(0x26BA, 0x1922, 0x10de, "NVIDIA L20-2"),
|
||||
ENTRY(0x26BA, 0x1923, 0x10de, "NVIDIA L20-3"),
|
||||
ENTRY(0x26BA, 0x1924, 0x10de, "NVIDIA L20-4"),
|
||||
ENTRY(0x26BA, 0x1925, 0x10de, "NVIDIA L20-6"),
|
||||
ENTRY(0x26BA, 0x1926, 0x10de, "NVIDIA L20-8"),
|
||||
ENTRY(0x26BA, 0x1927, 0x10de, "NVIDIA L20-12"),
|
||||
ENTRY(0x26BA, 0x1928, 0x10de, "NVIDIA L20-16"),
|
||||
ENTRY(0x26BA, 0x1929, 0x10de, "NVIDIA L20-24"),
|
||||
ENTRY(0x26BA, 0x192A, 0x10de, "NVIDIA L20-48"),
|
||||
ENTRY(0x26BA, 0x192B, 0x10de, "NVIDIA L20-4C"),
|
||||
ENTRY(0x26BA, 0x192C, 0x10de, "NVIDIA L20-6C"),
|
||||
ENTRY(0x26BA, 0x192D, 0x10de, "NVIDIA L20-8C"),
|
||||
ENTRY(0x26BA, 0x192E, 0x10de, "NVIDIA L20-12C"),
|
||||
ENTRY(0x26BA, 0x192F, 0x10de, "NVIDIA L20-16C"),
|
||||
ENTRY(0x26BA, 0x1930, 0x10de, "NVIDIA L20-24C"),
|
||||
ENTRY(0x26BA, 0x1931, 0x10de, "NVIDIA L20-48C"),
|
||||
ENTRY(0x27B6, 0x1938, 0x10de, "NVIDIA L2-1B"),
|
||||
ENTRY(0x27B6, 0x1939, 0x10de, "NVIDIA L2-2B"),
|
||||
ENTRY(0x27B6, 0x193A, 0x10de, "NVIDIA L2-1Q"),
|
||||
ENTRY(0x27B6, 0x193B, 0x10de, "NVIDIA L2-2Q"),
|
||||
ENTRY(0x27B6, 0x193C, 0x10de, "NVIDIA L2-3Q"),
|
||||
ENTRY(0x27B6, 0x193D, 0x10de, "NVIDIA L2-4Q"),
|
||||
ENTRY(0x27B6, 0x193E, 0x10de, "NVIDIA L2-6Q"),
|
||||
ENTRY(0x27B6, 0x193F, 0x10de, "NVIDIA L2-8Q"),
|
||||
ENTRY(0x27B6, 0x1940, 0x10de, "NVIDIA L2-12Q"),
|
||||
ENTRY(0x27B6, 0x1941, 0x10de, "NVIDIA L2-24Q"),
|
||||
ENTRY(0x27B6, 0x1942, 0x10de, "NVIDIA L2-1A"),
|
||||
ENTRY(0x27B6, 0x1943, 0x10de, "NVIDIA L2-2A"),
|
||||
ENTRY(0x27B6, 0x1944, 0x10de, "NVIDIA L2-3A"),
|
||||
ENTRY(0x27B6, 0x1945, 0x10de, "NVIDIA L2-4A"),
|
||||
ENTRY(0x27B6, 0x1946, 0x10de, "NVIDIA L2-6A"),
|
||||
ENTRY(0x27B6, 0x1947, 0x10de, "NVIDIA L2-8A"),
|
||||
ENTRY(0x27B6, 0x1948, 0x10de, "NVIDIA L2-12A"),
|
||||
ENTRY(0x27B6, 0x1949, 0x10de, "NVIDIA L2-24A"),
|
||||
ENTRY(0x27B6, 0x194A, 0x10de, "NVIDIA L2-1"),
|
||||
ENTRY(0x27B6, 0x194B, 0x10de, "NVIDIA L2-2"),
|
||||
ENTRY(0x27B6, 0x194C, 0x10de, "NVIDIA L2-3"),
|
||||
ENTRY(0x27B6, 0x194D, 0x10de, "NVIDIA L2-4"),
|
||||
ENTRY(0x27B6, 0x194E, 0x10de, "NVIDIA L2-6"),
|
||||
ENTRY(0x27B6, 0x194F, 0x10de, "NVIDIA L2-8"),
|
||||
ENTRY(0x27B6, 0x1950, 0x10de, "NVIDIA L2-12"),
|
||||
ENTRY(0x27B6, 0x1951, 0x10de, "NVIDIA L2-24"),
|
||||
ENTRY(0x27B6, 0x1952, 0x10de, "NVIDIA L2-4C"),
|
||||
ENTRY(0x27B6, 0x1953, 0x10de, "NVIDIA L2-6C"),
|
||||
ENTRY(0x27B6, 0x1954, 0x10de, "NVIDIA L2-8C"),
|
||||
ENTRY(0x27B6, 0x1955, 0x10de, "NVIDIA L2-12C"),
|
||||
ENTRY(0x27B6, 0x1956, 0x10de, "NVIDIA L2-24C"),
|
||||
ENTRY(0x27B8, 0x172F, 0x10de, "NVIDIA L4-1B"),
|
||||
ENTRY(0x27B8, 0x1730, 0x10de, "NVIDIA L4-2B"),
|
||||
ENTRY(0x27B8, 0x1731, 0x10de, "NVIDIA L4-1Q"),
|
||||
|
@ -18,9 +18,9 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
|
||||
{ 0x20B9, 0x157F, 0x20B7, 0x1532 },
|
||||
{ 0x20FD, 0x17F8, 0x20F5, 0x0 },
|
||||
{ 0x2324, 0x17A8, 0x2324, 0x17A6 },
|
||||
{ 0x2329, 0x198C, 0x2329, 0x198B },
|
||||
{ 0x2330, 0x16C0, 0x2330, 0x16C1 },
|
||||
{ 0x2336, 0x16C2, 0x2330, 0x16C1 },
|
||||
{ 0x2342, 0x1809, 0x2342, 0x1805 },
|
||||
};
|
||||
|
||||
for (NvU32 i = 0; i < (sizeof(vgpu_aliases) / sizeof(struct vgpu_alias_details)); ++i) {
|
||||
@ -136,6 +136,13 @@ static const struct {
|
||||
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1280}, // NVIDIA H800XM-3-40C
|
||||
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1281}, // NVIDIA H800XM-4-40C
|
||||
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1282}, // NVIDIA H800XM-7-80C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1397}, // NVIDIA H20-1-12CME
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1398}, // NVIDIA H20-1-12C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1399}, // NVIDIA H20-1-24C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1400}, // NVIDIA H20-2-24C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1401}, // NVIDIA H20-3-48C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1402}, // NVIDIA H20-4-48C
|
||||
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1403}, // NVIDIA H20-7-96C
|
||||
{0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
|
||||
{0x233610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
|
||||
{0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1131}, // NVIDIA H100XM-1-10C
|
||||
@ -178,13 +185,13 @@ static const struct {
|
||||
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1079}, // NVIDIA H800L-3-47C
|
||||
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1080}, // NVIDIA H800L-4-47C
|
||||
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1081}, // NVIDIA H800L-7-94C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA H100GL-1-12CME
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1197}, // NVIDIA H100GL-1-12C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1198}, // NVIDIA H100GL-1-24C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1199}, // NVIDIA H100GL-2-24C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1200}, // NVIDIA H100GL-3-48C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1201}, // NVIDIA H100GL-4-48C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1202}, // NVIDIA H100GL-7-96C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA GH200-1-12CME
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1197}, // NVIDIA GH200-1-12C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1198}, // NVIDIA GH200-1-24C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1199}, // NVIDIA GH200-2-24C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1200}, // NVIDIA GH200-3-48C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1201}, // NVIDIA GH200-4-48C
|
||||
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1202}, // NVIDIA GH200-7-96C
|
||||
|
||||
};
|
||||
#endif // GENERATE_vgpuSmcTypeIdMappings
|
||||
|
@ -2397,17 +2397,19 @@ NvBool nvHdmiFrlQueryConfig(
|
||||
NvU8 *pHdmiFrlBpc,
|
||||
NVDscInfoEvoRec *pDscInfo)
|
||||
{
|
||||
// Try first with 10 BPC
|
||||
if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
|
||||
pModeTimings,
|
||||
pHwTimings,
|
||||
b2Heads1Or,
|
||||
pValidationParams,
|
||||
HDMI_BPC10,
|
||||
pConfig,
|
||||
pHdmiFrlBpc,
|
||||
pDscInfo)) {
|
||||
return TRUE;
|
||||
if (nvDpyIsHdmiDepth30Evo(pDpyEvo)) {
|
||||
// Try first with 10 BPC
|
||||
if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
|
||||
pModeTimings,
|
||||
pHwTimings,
|
||||
b2Heads1Or,
|
||||
pValidationParams,
|
||||
HDMI_BPC10,
|
||||
pConfig,
|
||||
pHdmiFrlBpc,
|
||||
pDscInfo)) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
// Try again with 8 BPC
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -223,7 +223,8 @@ typedef struct
|
||||
|
||||
#define GSP_FW_HEAP_FREE_LIST_MAGIC 0x4845415046524545ULL
|
||||
|
||||
#define GSP_FW_FLAGS 8:0
|
||||
#define GSP_FW_FLAGS_CLOCK_BOOST NVBIT(0)
|
||||
#define GSP_FW_FLAGS 8:0
|
||||
#define GSP_FW_FLAGS_CLOCK_BOOST NVBIT(0)
|
||||
#define GSP_FW_FLAGS_RECOVERY_MARGIN_PRESENT NVBIT(1)
|
||||
|
||||
#endif // GSP_FW_WPR_META_H_
|
||||
|
160
src/nvidia/arch/nvalloc/common/inc/spdm/rmspdmrsakeys.h
Normal file
160
src/nvidia/arch/nvalloc/common/inc/spdm/rmspdmrsakeys.h
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*!
|
||||
* @file rmspdmrsakeys.h
|
||||
* @brief SPDM Interfaces - RSA keys
|
||||
*/
|
||||
|
||||
#ifndef _RM_SPDM_RSA_KEYS_H_
|
||||
#define _RM_SPDM_RSA_KEYS_H_
|
||||
|
||||
//
|
||||
// Note !!
|
||||
// All key components order must be big-endian(network order).
|
||||
// If any change for these key components, need to rebuild GSP/RM code as well.
|
||||
//
|
||||
#ifndef USE_MBEDTLS
|
||||
|
||||
const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
|
||||
{
|
||||
0xcd, 0x65, 0xd2, 0xca, 0x74, 0xd1, 0x76, 0x76, 0x9f, 0x18, 0x23, 0x64, 0x55, 0x84, 0x98, 0x60,
|
||||
0x71, 0xeb, 0x42, 0x6d, 0xa4, 0x98, 0xf9, 0x92, 0xea, 0x83, 0x5b, 0x9b, 0xe2, 0x66, 0x8b, 0x43,
|
||||
0x14, 0x4d, 0x5a, 0xb8, 0xdb, 0x68, 0x2b, 0xfa, 0x5f, 0xdf, 0x7f, 0xf4, 0xfd, 0x5e, 0x42, 0x34,
|
||||
0x09, 0x98, 0xa1, 0x21, 0x98, 0x4c, 0x8d, 0xbc, 0x99, 0xdb, 0xea, 0xc1, 0xe3, 0x42, 0xe6, 0x67,
|
||||
0x26, 0x86, 0x2c, 0xd0, 0xdb, 0xf3, 0x9c, 0x12, 0xad, 0xb3, 0x82, 0x93, 0x9c, 0xb9, 0xae, 0x98,
|
||||
0x82, 0xeb, 0x59, 0xb6, 0x5c, 0x09, 0x9f, 0xa6, 0x15, 0x30, 0xa0, 0xc6, 0x77, 0xd5, 0xae, 0xa2,
|
||||
0x91, 0x65, 0x24, 0xc3, 0x7d, 0x9b, 0xa4, 0x2c, 0x31, 0x73, 0x41, 0x26, 0x72, 0xe7, 0x2f, 0xb8,
|
||||
0x60, 0xd8, 0xce, 0xb8, 0xd8, 0x4b, 0x90, 0x6c, 0xa3, 0x19, 0x7e, 0x2b, 0xd5, 0xf6, 0x05, 0x8a,
|
||||
0x2b, 0xb9, 0x9e, 0x27, 0xba, 0x2e, 0x16, 0x81, 0x9a, 0x9e, 0xf5, 0x6c, 0x38, 0x0a, 0x01, 0xea,
|
||||
0xd6, 0xe1, 0xa4, 0x83, 0x76, 0xd0, 0x68, 0xbb, 0x90, 0x63, 0xce, 0x1c, 0x8c, 0x6f, 0x0f, 0x6b,
|
||||
0x65, 0x5b, 0x12, 0xe2, 0x92, 0x60, 0x79, 0x88, 0x99, 0x5e, 0x17, 0x89, 0x4d, 0x54, 0xb1, 0x87,
|
||||
0x9c, 0xe6, 0x6d, 0x3c, 0x56, 0x1c, 0x3a, 0x3e, 0xb9, 0x16, 0x2c, 0xc8, 0xda, 0x1a, 0xfb, 0x5c,
|
||||
0xd7, 0x16, 0x3c, 0x74, 0xda, 0x78, 0xf1, 0x53, 0xd7, 0x66, 0x97, 0x8a, 0x57, 0x0b, 0x86, 0x57,
|
||||
0x2c, 0x45, 0xa1, 0x5e, 0xae, 0x39, 0x39, 0xde, 0xe0, 0x40, 0x6f, 0xdb, 0x4c, 0xd8, 0xc1, 0x8c,
|
||||
0xdb, 0xce, 0xea, 0x05, 0xc0, 0xbc, 0x89, 0x72, 0x15, 0xfd, 0xbf, 0xb2, 0xb7, 0xf2, 0x5d, 0x05,
|
||||
0xdd, 0x2b, 0x53, 0xa4, 0x03, 0x1e, 0x11, 0x67, 0xa7, 0x0f, 0x87, 0xfb, 0x57, 0x48, 0x91, 0xc8,
|
||||
0x02, 0xb7, 0x46, 0x3d, 0x82, 0xcd, 0x06, 0x4a, 0x79, 0x0f, 0xa5, 0x8a, 0xac, 0xfb, 0xb7, 0xc2,
|
||||
0xf0, 0x95, 0x19, 0x4c, 0x78, 0x7a, 0xc7, 0xd7, 0x70, 0xee, 0x6e, 0x59, 0xaf, 0x51, 0x9b, 0x11,
|
||||
0x03, 0xd3, 0x56, 0xb3, 0x05, 0x5a, 0xbb, 0x1f, 0xbd, 0xc2, 0x0e, 0x89, 0x77, 0xb4, 0xc1, 0x02,
|
||||
0xf9, 0x97, 0x56, 0x07, 0x2e, 0x4e, 0x2d, 0x01, 0x73, 0x89, 0x7d, 0xf3, 0xc9, 0x8c, 0x88, 0x2c,
|
||||
0x79, 0xd9, 0x47, 0x34, 0x9e, 0x32, 0x51, 0xd4, 0xa6, 0x7e, 0xd1, 0x08, 0xda, 0xc0, 0x76, 0x24,
|
||||
0x8e, 0x25, 0x73, 0x14, 0x30, 0xd2, 0x17, 0x37, 0xbc, 0xe0, 0x3e, 0xa2, 0x47, 0xff, 0xe2, 0x4e,
|
||||
0x9b, 0x31, 0x6c, 0xe6, 0x54, 0xaf, 0x62, 0x3a, 0xcd, 0xfa, 0x2f, 0xaf, 0x73, 0x2e, 0x73, 0x4a,
|
||||
0x3a, 0x60, 0xa8, 0xa9, 0xfc, 0x77, 0xb2, 0x57, 0xdd, 0x3a, 0xfa, 0xce, 0x35, 0xc3, 0xea, 0xa9
|
||||
};
|
||||
|
||||
const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x00, 0x01, 0x00, 0x01};
|
||||
|
||||
const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
|
||||
{
|
||||
0x04, 0x85, 0xc0, 0x6d, 0x6a, 0xc3, 0x0d, 0xeb, 0xb0, 0xb7, 0x14, 0x58, 0x6a, 0x35, 0xa0, 0x31,
|
||||
0x47, 0x70, 0xd9, 0xa6, 0x96, 0x60, 0x33, 0xe6, 0x93, 0x39, 0x4f, 0x34, 0x10, 0x79, 0x17, 0x89,
|
||||
0xe3, 0x91, 0x8c, 0x74, 0xd3, 0x87, 0xe8, 0xa7, 0xfb, 0xa2, 0x6d, 0x2b, 0xd4, 0xc4, 0x55, 0x8a,
|
||||
0xd5, 0xc7, 0x41, 0x8c, 0xfe, 0xd0, 0x78, 0xb2, 0x75, 0x64, 0xcd, 0x37, 0x75, 0xac, 0x8c, 0x6c,
|
||||
0x2b, 0x01, 0xfb, 0x4c, 0xa1, 0xfb, 0x2c, 0x2d, 0x91, 0x1b, 0x89, 0xbd, 0x7c, 0x4e, 0xee, 0x54,
|
||||
0x37, 0x55, 0xbd, 0x44, 0xf1, 0xa3, 0xd8, 0x9b, 0x0c, 0x8e, 0x64, 0x8b, 0xda, 0x29, 0x9f, 0x4d,
|
||||
0xbf, 0x0b, 0xce, 0x12, 0x6b, 0xda, 0x98, 0x73, 0xcc, 0xa6, 0xf0, 0x01, 0x4b, 0xb8, 0x61, 0x69,
|
||||
0x9a, 0xe0, 0x4b, 0x34, 0xf0, 0xb6, 0x41, 0x38, 0xd3, 0x22, 0x85, 0x8f, 0xab, 0x87, 0x5e, 0x39,
|
||||
0xdb, 0x9a, 0x9b, 0xab, 0xde, 0x42, 0x93, 0x86, 0x49, 0x04, 0x44, 0xfd, 0x1c, 0x02, 0xc4, 0x66,
|
||||
0x0a, 0x53, 0x2f, 0x8f, 0x21, 0x77, 0x97, 0x46, 0xc1, 0xf8, 0x20, 0x9a, 0xaa, 0x50, 0xeb, 0xb2,
|
||||
0xfe, 0xa9, 0x51, 0xb2, 0x21, 0x6c, 0xf4, 0x60, 0x18, 0x98, 0x64, 0xc6, 0x46, 0x28, 0x9e, 0x3c,
|
||||
0x11, 0x2d, 0x55, 0xac, 0x65, 0x6c, 0xfb, 0xaf, 0x53, 0xdd, 0xf4, 0x20, 0x7c, 0x04, 0xea, 0x11,
|
||||
0xe8, 0x25, 0x65, 0x15, 0x32, 0x8e, 0x08, 0x84, 0xc7, 0x41, 0x13, 0x58, 0x73, 0x61, 0x63, 0x1f,
|
||||
0xef, 0x30, 0x1d, 0x3d, 0x36, 0x08, 0x3a, 0xb2, 0xf7, 0x25, 0x56, 0xfc, 0x03, 0x78, 0xb5, 0x45,
|
||||
0xf4, 0x60, 0x47, 0x95, 0x1c, 0x6a, 0x2b, 0xfa, 0xe3, 0x1c, 0x2f, 0x0d, 0x2a, 0x90, 0x43, 0x65,
|
||||
0x09, 0xbe, 0x63, 0x71, 0x33, 0xd3, 0x29, 0x31, 0xd5, 0x29, 0x26, 0xa3, 0x15, 0xc2, 0x46, 0x70,
|
||||
0xb4, 0x3e, 0x23, 0xaf, 0xb6, 0xfb, 0x87, 0x8f, 0x39, 0xf0, 0xd2, 0x3e, 0x35, 0x3c, 0xec, 0x7a,
|
||||
0xd2, 0x0f, 0xd8, 0xa4, 0x0c, 0x19, 0xc8, 0xee, 0x47, 0x7c, 0x1e, 0xd6, 0x67, 0x31, 0xe2, 0x9d,
|
||||
0xc0, 0x65, 0x64, 0x60, 0xe7, 0xd2, 0xeb, 0xe1, 0x02, 0xd5, 0x92, 0x7c, 0x51, 0xf1, 0x3b, 0x12,
|
||||
0x00, 0x65, 0xfd, 0x2b, 0x13, 0x15, 0xfa, 0x6d, 0x99, 0x1d, 0xd3, 0x03, 0x77, 0xb1, 0xb0, 0xf0,
|
||||
0x39, 0x7c, 0x27, 0x13, 0x30, 0xba, 0xff, 0x4d, 0x2e, 0xda, 0xe0, 0x37, 0xad, 0xf4, 0x49, 0x0a,
|
||||
0xdd, 0x1e, 0x87, 0x8c, 0xc9, 0x6b, 0xf8, 0xc6, 0xb3, 0x05, 0xeb, 0x6c, 0x5f, 0x84, 0x64, 0x62,
|
||||
0x1c, 0xf6, 0x04, 0x6f, 0xd7, 0xa9, 0xbc, 0x22, 0x97, 0xdb, 0x8d, 0xa5, 0xe1, 0x3a, 0x5c, 0x0d,
|
||||
0x7b, 0x78, 0x25, 0x98, 0x04, 0x7f, 0x2b, 0x59, 0x5b, 0x7c, 0xf7, 0x73, 0x37, 0x7f, 0x92, 0x8d
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
|
||||
{
|
||||
0xa9, 0xea, 0xc3, 0x35, 0xce, 0xfa, 0x3a, 0xdd, 0x57, 0xb2, 0x77, 0xfc, 0xa9, 0xa8, 0x60, 0x3a,
|
||||
0x4a, 0x73, 0x2e, 0x73, 0xaf, 0x2f, 0xfa, 0xcd, 0x3a, 0x62, 0xaf, 0x54, 0xe6, 0x6c, 0x31, 0x9b,
|
||||
0x4e, 0xe2, 0xff, 0x47, 0xa2, 0x3e, 0xe0, 0xbc, 0x37, 0x17, 0xd2, 0x30, 0x14, 0x73, 0x25, 0x8e,
|
||||
0x24, 0x76, 0xc0, 0xda, 0x08, 0xd1, 0x7e, 0xa6, 0xd4, 0x51, 0x32, 0x9e, 0x34, 0x47, 0xd9, 0x79,
|
||||
0x2c, 0x88, 0x8c, 0xc9, 0xf3, 0x7d, 0x89, 0x73, 0x01, 0x2d, 0x4e, 0x2e, 0x07, 0x56, 0x97, 0xf9,
|
||||
0x02, 0xc1, 0xb4, 0x77, 0x89, 0x0e, 0xc2, 0xbd, 0x1f, 0xbb, 0x5a, 0x05, 0xb3, 0x56, 0xd3, 0x03,
|
||||
0x11, 0x9b, 0x51, 0xaf, 0x59, 0x6e, 0xee, 0x70, 0xd7, 0xc7, 0x7a, 0x78, 0x4c, 0x19, 0x95, 0xf0,
|
||||
0xc2, 0xb7, 0xfb, 0xac, 0x8a, 0xa5, 0x0f, 0x79, 0x4a, 0x06, 0xcd, 0x82, 0x3d, 0x46, 0xb7, 0x02,
|
||||
0xc8, 0x91, 0x48, 0x57, 0xfb, 0x87, 0x0f, 0xa7, 0x67, 0x11, 0x1e, 0x03, 0xa4, 0x53, 0x2b, 0xdd,
|
||||
0x05, 0x5d, 0xf2, 0xb7, 0xb2, 0xbf, 0xfd, 0x15, 0x72, 0x89, 0xbc, 0xc0, 0x05, 0xea, 0xce, 0xdb,
|
||||
0x8c, 0xc1, 0xd8, 0x4c, 0xdb, 0x6f, 0x40, 0xe0, 0xde, 0x39, 0x39, 0xae, 0x5e, 0xa1, 0x45, 0x2c,
|
||||
0x57, 0x86, 0x0b, 0x57, 0x8a, 0x97, 0x66, 0xd7, 0x53, 0xf1, 0x78, 0xda, 0x74, 0x3c, 0x16, 0xd7,
|
||||
0x5c, 0xfb, 0x1a, 0xda, 0xc8, 0x2c, 0x16, 0xb9, 0x3e, 0x3a, 0x1c, 0x56, 0x3c, 0x6d, 0xe6, 0x9c,
|
||||
0x87, 0xb1, 0x54, 0x4d, 0x89, 0x17, 0x5e, 0x99, 0x88, 0x79, 0x60, 0x92, 0xe2, 0x12, 0x5b, 0x65,
|
||||
0x6b, 0x0f, 0x6f, 0x8c, 0x1c, 0xce, 0x63, 0x90, 0xbb, 0x68, 0xd0, 0x76, 0x83, 0xa4, 0xe1, 0xd6,
|
||||
0xea, 0x01, 0x0a, 0x38, 0x6c, 0xf5, 0x9e, 0x9a, 0x81, 0x16, 0x2e, 0xba, 0x27, 0x9e, 0xb9, 0x2b,
|
||||
0x8a, 0x05, 0xf6, 0xd5, 0x2b, 0x7e, 0x19, 0xa3, 0x6c, 0x90, 0x4b, 0xd8, 0xb8, 0xce, 0xd8, 0x60,
|
||||
0xb8, 0x2f, 0xe7, 0x72, 0x26, 0x41, 0x73, 0x31, 0x2c, 0xa4, 0x9b, 0x7d, 0xc3, 0x24, 0x65, 0x91,
|
||||
0xa2, 0xae, 0xd5, 0x77, 0xc6, 0xa0, 0x30, 0x15, 0xa6, 0x9f, 0x09, 0x5c, 0xb6, 0x59, 0xeb, 0x82,
|
||||
0x98, 0xae, 0xb9, 0x9c, 0x93, 0x82, 0xb3, 0xad, 0x12, 0x9c, 0xf3, 0xdb, 0xd0, 0x2c, 0x86, 0x26,
|
||||
0x67, 0xe6, 0x42, 0xe3, 0xc1, 0xea, 0xdb, 0x99, 0xbc, 0x8d, 0x4c, 0x98, 0x21, 0xa1, 0x98, 0x09,
|
||||
0x34, 0x42, 0x5e, 0xfd, 0xf4, 0x7f, 0xdf, 0x5f, 0xfa, 0x2b, 0x68, 0xdb, 0xb8, 0x5a, 0x4d, 0x14,
|
||||
0x43, 0x8b, 0x66, 0xe2, 0x9b, 0x5b, 0x83, 0xea, 0x92, 0xf9, 0x98, 0xa4, 0x6d, 0x42, 0xeb, 0x71,
|
||||
0x60, 0x98, 0x84, 0x55, 0x64, 0x23, 0x18, 0x9f, 0x76, 0x76, 0xd1, 0x74, 0xca, 0xd2, 0x65, 0xcd,
|
||||
0x00
|
||||
};
|
||||
|
||||
const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x01, 0x00, 0x01, 0x00};
|
||||
|
||||
const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
|
||||
{
|
||||
0x8d, 0x92, 0x7f, 0x37, 0x73, 0xf7, 0x7c, 0x5b, 0x59, 0x2b, 0x7f, 0x04, 0x98, 0x25, 0x78, 0x7b,
|
||||
0x0d, 0x5c, 0x3a, 0xe1, 0xa5, 0x8d, 0xdb, 0x97, 0x22, 0xbc, 0xa9, 0xd7, 0x6f, 0x04, 0xf6, 0x1c,
|
||||
0x62, 0x64, 0x84, 0x5f, 0x6c, 0xeb, 0x05, 0xb3, 0xc6, 0xf8, 0x6b, 0xc9, 0x8c, 0x87, 0x1e, 0xdd,
|
||||
0x0a, 0x49, 0xf4, 0xad, 0x37, 0xe0, 0xda, 0x2e, 0x4d, 0xff, 0xba, 0x30, 0x13, 0x27, 0x7c, 0x39,
|
||||
0xf0, 0xb0, 0xb1, 0x77, 0x03, 0xd3, 0x1d, 0x99, 0x6d, 0xfa, 0x15, 0x13, 0x2b, 0xfd, 0x65, 0x00,
|
||||
0x12, 0x3b, 0xf1, 0x51, 0x7c, 0x92, 0xd5, 0x02, 0xe1, 0xeb, 0xd2, 0xe7, 0x60, 0x64, 0x65, 0xc0,
|
||||
0x9d, 0xe2, 0x31, 0x67, 0xd6, 0x1e, 0x7c, 0x47, 0xee, 0xc8, 0x19, 0x0c, 0xa4, 0xd8, 0x0f, 0xd2,
|
||||
0x7a, 0xec, 0x3c, 0x35, 0x3e, 0xd2, 0xf0, 0x39, 0x8f, 0x87, 0xfb, 0xb6, 0xaf, 0x23, 0x3e, 0xb4,
|
||||
0x70, 0x46, 0xc2, 0x15, 0xa3, 0x26, 0x29, 0xd5, 0x31, 0x29, 0xd3, 0x33, 0x71, 0x63, 0xbe, 0x09,
|
||||
0x65, 0x43, 0x90, 0x2a, 0x0d, 0x2f, 0x1c, 0xe3, 0xfa, 0x2b, 0x6a, 0x1c, 0x95, 0x47, 0x60, 0xf4,
|
||||
0x45, 0xb5, 0x78, 0x03, 0xfc, 0x56, 0x25, 0xf7, 0xb2, 0x3a, 0x08, 0x36, 0x3d, 0x1d, 0x30, 0xef,
|
||||
0x1f, 0x63, 0x61, 0x73, 0x58, 0x13, 0x41, 0xc7, 0x84, 0x08, 0x8e, 0x32, 0x15, 0x65, 0x25, 0xe8,
|
||||
0x11, 0xea, 0x04, 0x7c, 0x20, 0xf4, 0xdd, 0x53, 0xaf, 0xfb, 0x6c, 0x65, 0xac, 0x55, 0x2d, 0x11,
|
||||
0x3c, 0x9e, 0x28, 0x46, 0xc6, 0x64, 0x98, 0x18, 0x60, 0xf4, 0x6c, 0x21, 0xb2, 0x51, 0xa9, 0xfe,
|
||||
0xb2, 0xeb, 0x50, 0xaa, 0x9a, 0x20, 0xf8, 0xc1, 0x46, 0x97, 0x77, 0x21, 0x8f, 0x2f, 0x53, 0x0a,
|
||||
0x66, 0xc4, 0x02, 0x1c, 0xfd, 0x44, 0x04, 0x49, 0x86, 0x93, 0x42, 0xde, 0xab, 0x9b, 0x9a, 0xdb,
|
||||
0x39, 0x5e, 0x87, 0xab, 0x8f, 0x85, 0x22, 0xd3, 0x38, 0x41, 0xb6, 0xf0, 0x34, 0x4b, 0xe0, 0x9a,
|
||||
0x69, 0x61, 0xb8, 0x4b, 0x01, 0xf0, 0xa6, 0xcc, 0x73, 0x98, 0xda, 0x6b, 0x12, 0xce, 0x0b, 0xbf,
|
||||
0x4d, 0x9f, 0x29, 0xda, 0x8b, 0x64, 0x8e, 0x0c, 0x9b, 0xd8, 0xa3, 0xf1, 0x44, 0xbd, 0x55, 0x37,
|
||||
0x54, 0xee, 0x4e, 0x7c, 0xbd, 0x89, 0x1b, 0x91, 0x2d, 0x2c, 0xfb, 0xa1, 0x4c, 0xfb, 0x01, 0x2b,
|
||||
0x6c, 0x8c, 0xac, 0x75, 0x37, 0xcd, 0x64, 0x75, 0xb2, 0x78, 0xd0, 0xfe, 0x8c, 0x41, 0xc7, 0xd5,
|
||||
0x8a, 0x55, 0xc4, 0xd4, 0x2b, 0x6d, 0xa2, 0xfb, 0xa7, 0xe8, 0x87, 0xd3, 0x74, 0x8c, 0x91, 0xe3,
|
||||
0x89, 0x17, 0x79, 0x10, 0x34, 0x4f, 0x39, 0x93, 0xe6, 0x33, 0x60, 0x96, 0xa6, 0xd9, 0x70, 0x47,
|
||||
0x31, 0xa0, 0x35, 0x6a, 0x58, 0x14, 0xb7, 0xb0, 0xeb, 0x0d, 0xc3, 0x6a, 0x6d, 0xc0, 0x85, 0x04
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _RM_SPDM_RSA_KEYS_H_
|
@ -839,6 +839,45 @@
|
||||
#define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
|
||||
#define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)
|
||||
|
||||
/*
|
||||
* Option: NVreg_ImexChannelCount
|
||||
*
|
||||
* Description:
|
||||
*
|
||||
* This option allows users to specify the number of IMEX (import/export)
|
||||
* channels. Within an IMEX domain, the channels allow sharing memory
|
||||
* securely in a multi-user environment using the CUDA driver's fabric handle
|
||||
* based APIs.
|
||||
*
|
||||
* An IMEX domain is either an OS instance or a group of securely
|
||||
* connected OS instances using the NVIDIA IMEX daemon. The option must
|
||||
* be set to the same value on each OS instance within the IMEX domain.
|
||||
*
|
||||
* An IMEX channel is a logical entity that is represented by a /dev node.
|
||||
* The IMEX channels are global resources within the IMEX domain. When
|
||||
* exporter and importer CUDA processes have been granted access to the
|
||||
* same IMEX channel, they can securely share memory.
|
||||
*
|
||||
* Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
|
||||
* the related CUDA APIs will fail with an insufficient permission error until
|
||||
* the /dev nodes are set up. The creation of these /dev nodes,
|
||||
* /dev/nvidia-caps-imex-channels/channelN, must be handled by the
|
||||
* administrator, where N is the minor number. The major number can be
|
||||
* queried from /proc/devices.
|
||||
*
|
||||
* nvidia-modprobe CLI support is available to set up the /dev nodes.
|
||||
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
|
||||
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
|
||||
*
|
||||
* Possible values:
|
||||
* 0 - Disable IMEX using CUDA driver's fabric handles.
|
||||
* N - N IMEX channels will be enabled in the driver to facilitate N
|
||||
* concurrent users. Default value is 2048 channels, and the current
|
||||
* maximum value is 20-bit, same as Linux dev_t's minor number limit.
|
||||
*/
|
||||
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
|
||||
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
|
||||
|
||||
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
|
||||
|
||||
/*
|
||||
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
|
||||
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
|
||||
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
|
||||
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
|
||||
|
||||
/*
|
||||
*----------------registry database definition----------------------
|
||||
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
|
||||
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
@ -621,6 +621,14 @@ typedef enum
|
||||
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
|
||||
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
|
||||
|
||||
/*
|
||||
* For console setup by EFI GOP, the base address is BAR1.
|
||||
* For console setup by VBIOS, the base address is BAR2 + 16MB.
|
||||
*/
|
||||
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
|
||||
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
|
||||
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
|
||||
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iommus.iso_iommu_present)
|
||||
|
||||
@ -878,6 +886,8 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
|
||||
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
|
||||
|
||||
struct dma_buf;
|
||||
typedef struct nv_dma_buf nv_dma_buf_t;
|
||||
struct drm_gem_object;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -160,10 +160,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
|
||||
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
|
||||
NvBool NV_API_CALL os_is_grid_supported (void);
|
||||
NvU32 NV_API_CALL os_get_grid_csp_support (void);
|
||||
void NV_API_CALL os_get_screen_info (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
|
||||
void NV_API_CALL os_bug_check (NvU32, const char *);
|
||||
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
|
||||
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
|
||||
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
|
||||
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
|
||||
@ -198,6 +197,8 @@ nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry (nv_cap_t *, const char *,
|
||||
void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
|
||||
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
|
||||
void NV_API_CALL os_nv_cap_close_fd (int);
|
||||
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
|
||||
NvS32 NV_API_CALL os_imex_channel_count (void);
|
||||
|
||||
enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
|
||||
@ -219,6 +220,7 @@ extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
||||
/*
|
||||
* ---------------------------------------------------------------------------
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -167,25 +167,11 @@ static void RmCreateOsDescriptor(NVOS32_PARAMETERS *pApi, API_SECURITY_INFO secI
|
||||
}
|
||||
else if (rmStatus == NV_ERR_INVALID_ADDRESS)
|
||||
{
|
||||
rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount,
|
||||
&pPteArray, &pPageArray);
|
||||
rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount, &pPteArray);
|
||||
if (rmStatus == NV_OK)
|
||||
{
|
||||
if (pPageArray != NULL)
|
||||
{
|
||||
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPageArray;
|
||||
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_PAGE_ARRAY;
|
||||
}
|
||||
else if (pPteArray != NULL)
|
||||
{
|
||||
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
|
||||
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_ASSERT_FAILED("unknown memory import type");
|
||||
rmStatus = NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
|
||||
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
|
||||
}
|
||||
}
|
||||
if (rmStatus != NV_OK)
|
||||
|
@ -1165,6 +1165,11 @@ NV_STATUS osIsVfioPciCorePresent(void)
|
||||
return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VFIO_PCI_CORE_PRESENT);
|
||||
}
|
||||
|
||||
NvU32 osGetGridCspSupport(void)
|
||||
{
|
||||
return os_get_grid_csp_support();
|
||||
}
|
||||
|
||||
void initVGXSpecificRegistry(OBJGPU *pGpu)
|
||||
{
|
||||
NvU32 data32;
|
||||
|
@ -4828,6 +4828,39 @@ osRmCapInitDescriptor
|
||||
*pCapDescriptor = NV_U64_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Checks if IMEX channel support is present.
|
||||
*/
|
||||
NvBool
|
||||
osImexChannelIsSupported(void)
|
||||
{
|
||||
return os_imex_channel_is_supported;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Returns IMEX channel count.
|
||||
*/
|
||||
NvS32
|
||||
osImexChannelCount
|
||||
(
|
||||
void
|
||||
)
|
||||
{
|
||||
return os_imex_channel_count();
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Returns IMEX channel number.
|
||||
*
|
||||
* @param[in] descriptor OS specific descriptor to query channel number.
|
||||
*
|
||||
*/
|
||||
NvS32
|
||||
osImexChannelGet(NvU64 descriptor)
|
||||
{
|
||||
return os_imex_channel_get(descriptor);
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Generates random bytes which can be used as a universally unique
|
||||
* identifier.
|
||||
|
@ -2153,6 +2153,7 @@ static NV_STATUS RmCreateMmapContextLocked(
|
||||
RsClient *pClient = staticCast(pRmClient, RsClient);
|
||||
KernelMemorySystem *pKernelMemorySystem = NULL;
|
||||
NvBool bCoherentAtsCpuOffset = NV_FALSE;
|
||||
NvBool bSriovHostCoherentFbOffset = NV_FALSE;
|
||||
nv_state_t *pNv = NULL;
|
||||
NvU64 addr = (NvU64)address;
|
||||
NvU32 prot = 0;
|
||||
@ -2200,6 +2201,8 @@ static NV_STATUS RmCreateMmapContextLocked(
|
||||
pNv = NV_GET_NV_STATE(pGpu);
|
||||
pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
|
||||
bCoherentAtsCpuOffset = IS_COHERENT_CPU_ATS_OFFSET(pKernelMemorySystem, addr, size);
|
||||
bSriovHostCoherentFbOffset = os_is_vgx_hyper() &&
|
||||
IS_COHERENT_FB_OFFSET(pKernelMemorySystem, addr, size);
|
||||
}
|
||||
|
||||
//
|
||||
@ -2210,7 +2213,7 @@ static NV_STATUS RmCreateMmapContextLocked(
|
||||
if ((pNv == NULL) ||
|
||||
(!IS_REG_OFFSET(pNv, addr, size) &&
|
||||
!IS_FB_OFFSET(pNv, addr, size) &&
|
||||
!bCoherentAtsCpuOffset &&
|
||||
!(bCoherentAtsCpuOffset || bSriovHostCoherentFbOffset) &&
|
||||
!IS_IMEM_OFFSET(pNv, addr, size)))
|
||||
{
|
||||
pNv = nv_get_ctl_state();
|
||||
@ -2240,6 +2243,38 @@ static NV_STATUS RmCreateMmapContextLocked(
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
else if (bSriovHostCoherentFbOffset)
|
||||
{
|
||||
status = RmGetMmapPteArray(pKernelMemorySystem, pClient, hMemory, nvuap);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
goto done;
|
||||
}
|
||||
|
||||
//
|
||||
// nvuap->page_array(allocated in RmGetMmapPteArray) is not assigned
|
||||
// to nvamc->page_array if onlining status is false(which is the case with
|
||||
// bSriovHostCoherentFbOffset) and so doesn't get freed if not done here.
|
||||
// The call to RmGetMmapPteArray is for getting the contig and num
|
||||
// pages of the allocation.
|
||||
//
|
||||
os_free_mem(nvuap->page_array);
|
||||
nvuap->page_array = NULL;
|
||||
|
||||
//
|
||||
// This path is taken in the case of self-hosted SRIOV host where
|
||||
// the coherent GPU memory is not onlined but the CPU mapping to
|
||||
// the coherent GPU memory is done via C2C(instead of BAR1) and so
|
||||
// only contig can be supported for now.
|
||||
//
|
||||
if (!nvuap->contig && (nvuap->num_pages > 1))
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Mapping of Non-contig allocation for "
|
||||
"not onlined coherent GPU memory not supported\n");
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (RmSetUserMapAccessRange(nvuap) != NV_OK)
|
||||
{
|
||||
@ -5377,16 +5412,11 @@ NvBool rm_get_uefi_console_status(
|
||||
NvU64 fbBaseAddress = 0;
|
||||
NvBool bConsoleDevice = NV_FALSE;
|
||||
|
||||
// os_get_screen_info() will return dimensions and an address for
|
||||
// any fbdev driver (e.g., efifb, vesafb, etc). To find if this is a
|
||||
// UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
|
||||
// driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
|
||||
// it will point to BAR2 + 16MB.
|
||||
os_get_screen_info(&fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
|
||||
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
|
||||
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
|
||||
|
||||
fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
|
||||
//
|
||||
// nv_get_screen_info() will return dimensions and an address for
|
||||
// any fbdev driver (e.g., efifb, vesafb, etc).
|
||||
//
|
||||
nv_get_screen_info(nv, &fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);
|
||||
|
||||
bConsoleDevice = (fbSize != 0);
|
||||
|
||||
@ -5403,16 +5433,11 @@ NvU64 rm_get_uefi_console_size(
|
||||
|
||||
fbSize = fbWidth = fbHeight = fbDepth = fbPitch = 0;
|
||||
|
||||
// os_get_screen_info() will return dimensions and an address for
|
||||
// any fbdev driver (e.g., efifb, vesafb, etc). To find if this is a
|
||||
// UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
|
||||
// driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
|
||||
// it will point to BAR2 + 16MB.
|
||||
os_get_screen_info(pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
|
||||
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
|
||||
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
|
||||
|
||||
fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
|
||||
//
|
||||
// nv_get_screen_info() will return dimensions and an address for
|
||||
// any fbdev driver (e.g., efifb, vesafb, etc).
|
||||
//
|
||||
nv_get_screen_info(nv, pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);
|
||||
|
||||
return fbSize;
|
||||
}
|
||||
|
@ -582,13 +582,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *sp,
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
|
||||
gpuDeviceHandle device)
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
|
||||
gpuFaultInfo *pFaultInfo,
|
||||
NvBool bCopyAndFlush)
|
||||
{
|
||||
NV_STATUS rmStatus;
|
||||
void *fp;
|
||||
NV_ENTER_RM_RUNTIME(sp,fp);
|
||||
rmStatus = nvGpuOpsFlushReplayableFaultBuffer(device);
|
||||
rmStatus = nvGpuOpsFlushReplayableFaultBuffer(pFaultInfo, bCopyAndFlush);
|
||||
NV_EXIT_RM_RUNTIME(sp,fp);
|
||||
return rmStatus;
|
||||
}
|
||||
|
@ -348,7 +348,7 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL
|
||||
|
||||
if ((memmgrGetReservedConsoleMemDesc(pGpu, pMemoryManager) != NULL) && bContinue)
|
||||
{
|
||||
NvU64 baseAddr;
|
||||
NvU64 baseAddr, size;
|
||||
NvU32 width, height, depth, pitch;
|
||||
|
||||
// There should only be one.
|
||||
@ -357,9 +357,8 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL
|
||||
pParams->subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
|
||||
|
||||
// Console is either mapped to BAR1 or BAR2 + 16 MB
|
||||
os_get_screen_info(&baseAddr, &width, &height, &depth, &pitch,
|
||||
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
|
||||
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
|
||||
nv_get_screen_info(nv, &baseAddr, &width, &height, &depth,
|
||||
&pitch, &size);
|
||||
|
||||
pParams->width = (NvU16)width;
|
||||
pParams->height = (NvU16)height;
|
||||
|
@ -622,7 +622,7 @@ NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(
|
||||
|
||||
// Copies all valid packets in RM's and client's shadow buffer
|
||||
status = kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, &threadState, faultsCopied,
|
||||
NON_REPLAYABLE_FAULT_BUFFER);
|
||||
NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);
|
||||
|
||||
threadStateFreeISRAndDeferredIntHandler(&threadState, pGpu, THREAD_STATE_FLAGS_IS_ISR);
|
||||
tlsIsrDestroy(pIsrAllocator);
|
||||
@ -659,7 +659,7 @@ static NV_STATUS _rm_gpu_copy_mmu_faults_unlocked(
|
||||
|
||||
// Copies all valid packets in RM's and client's shadow buffer
|
||||
return kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, pThreadState, pFaultsCopied,
|
||||
NON_REPLAYABLE_FAULT_BUFFER);
|
||||
NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -156,6 +156,7 @@ struct RmClient {
|
||||
PSECURITY_TOKEN pSecurityToken;
|
||||
struct UserInfo *pUserInfo;
|
||||
NvBool bIsClientVirtualMode;
|
||||
NvS32 imexChannel;
|
||||
PNODE pCliSyncGpuBoostTree;
|
||||
};
|
||||
|
||||
|
@ -1136,28 +1136,6 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
|
||||
pThis->__gpuUpdateErrorContainmentState__ = &gpuUpdateErrorContainmentState_c04480;
|
||||
}
|
||||
|
||||
// Hal function -- gpuCheckEccCounts
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */
|
||||
{
|
||||
pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_TU102;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_d44104;
|
||||
}
|
||||
|
||||
// Hal function -- gpuClearEccCounts
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */
|
||||
{
|
||||
pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_TU102;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_ac1694;
|
||||
}
|
||||
|
||||
// Hal function -- gpuWaitForGfwBootComplete
|
||||
if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */
|
||||
{
|
||||
|
@ -7,7 +7,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2004-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -973,8 +973,6 @@ struct OBJGPU {
|
||||
NvBool (*__gpuIsDevModeEnabledInHw__)(struct OBJGPU *);
|
||||
NvBool (*__gpuIsCtxBufAllocInPmaSupported__)(struct OBJGPU *);
|
||||
NV_STATUS (*__gpuUpdateErrorContainmentState__)(struct OBJGPU *, NV_ERROR_CONT_ERR_ID, NV_ERROR_CONT_LOCATION, NvU32 *);
|
||||
void (*__gpuCheckEccCounts__)(struct OBJGPU *);
|
||||
NV_STATUS (*__gpuClearEccCounts__)(struct OBJGPU *);
|
||||
NV_STATUS (*__gpuWaitForGfwBootComplete__)(struct OBJGPU *);
|
||||
NvBool (*__gpuGetIsCmpSku__)(struct OBJGPU *);
|
||||
NvBool PDB_PROP_GPU_HIGH_SPEED_BRIDGE_CONNECTED;
|
||||
@ -1236,6 +1234,7 @@ struct OBJGPU {
|
||||
NvBool bStateUnloading;
|
||||
NvBool bStateLoaded;
|
||||
NvBool bFullyConstructed;
|
||||
NvBool bRecoveryMarginPresent;
|
||||
NvBool bBf3WarBug4040336Enabled;
|
||||
NvBool bUnifiedMemorySpaceEnabled;
|
||||
NvBool bSriovEnabled;
|
||||
@ -1633,10 +1632,6 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
|
||||
#define gpuIsCtxBufAllocInPmaSupported_HAL(pGpu) gpuIsCtxBufAllocInPmaSupported_DISPATCH(pGpu)
|
||||
#define gpuUpdateErrorContainmentState(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
|
||||
#define gpuUpdateErrorContainmentState_HAL(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
|
||||
#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
|
||||
#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
|
||||
#define gpuClearEccCounts(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
|
||||
#define gpuClearEccCounts_HAL(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
|
||||
#define gpuWaitForGfwBootComplete(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
|
||||
#define gpuWaitForGfwBootComplete_HAL(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
|
||||
#define gpuGetIsCmpSku(pGpu) gpuGetIsCmpSku_DISPATCH(pGpu)
|
||||
@ -2557,6 +2552,34 @@ static inline NV_STATUS gpuSetPartitionErrorAttribution(struct OBJGPU *pGpu, NV_
|
||||
|
||||
#define gpuSetPartitionErrorAttribution_HAL(pGpu, arg0, arg1, arg2) gpuSetPartitionErrorAttribution(pGpu, arg0, arg1, arg2)
|
||||
|
||||
NV_STATUS gpuCreateRusdMemory_IMPL(struct OBJGPU *pGpu);
|
||||
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline NV_STATUS gpuCreateRusdMemory(struct OBJGPU *pGpu) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuCreateRusdMemory(pGpu) gpuCreateRusdMemory_IMPL(pGpu)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
#define gpuCreateRusdMemory_HAL(pGpu) gpuCreateRusdMemory(pGpu)
|
||||
|
||||
NvBool gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
|
||||
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline NvBool gpuCheckEccCounts(struct OBJGPU *pGpu) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
return NV_FALSE;
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_TU102(pGpu)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts(pGpu)
|
||||
|
||||
NV_STATUS gpuConstructDeviceInfoTable_FWCLIENT(struct OBJGPU *pGpu);
|
||||
|
||||
NV_STATUS gpuConstructDeviceInfoTable_VGPUSTUB(struct OBJGPU *pGpu);
|
||||
@ -3147,26 +3170,6 @@ static inline NV_STATUS gpuUpdateErrorContainmentState_DISPATCH(struct OBJGPU *p
|
||||
return pGpu->__gpuUpdateErrorContainmentState__(pGpu, arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
static inline void gpuCheckEccCounts_d44104(struct OBJGPU *pGpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
void gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
|
||||
|
||||
static inline void gpuCheckEccCounts_DISPATCH(struct OBJGPU *pGpu) {
|
||||
pGpu->__gpuCheckEccCounts__(pGpu);
|
||||
}
|
||||
|
||||
static inline NV_STATUS gpuClearEccCounts_ac1694(struct OBJGPU *pGpu) {
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS gpuClearEccCounts_TU102(struct OBJGPU *pGpu);
|
||||
|
||||
static inline NV_STATUS gpuClearEccCounts_DISPATCH(struct OBJGPU *pGpu) {
|
||||
return pGpu->__gpuClearEccCounts__(pGpu);
|
||||
}
|
||||
|
||||
NV_STATUS gpuWaitForGfwBootComplete_TU102(struct OBJGPU *pGpu);
|
||||
|
||||
static inline NV_STATUS gpuWaitForGfwBootComplete_5baef9(struct OBJGPU *pGpu) {
|
||||
@ -4458,16 +4461,6 @@ static inline NV_STATUS gpuSanityCheckRegisterAccess(struct OBJGPU *pGpu, NvU32
|
||||
#define gpuSanityCheckRegisterAccess(pGpu, addr, pRetVal) gpuSanityCheckRegisterAccess_IMPL(pGpu, addr, pRetVal)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
void gpuUpdateUserSharedData_IMPL(struct OBJGPU *pGpu);
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline void gpuUpdateUserSharedData(struct OBJGPU *pGpu) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuUpdateUserSharedData(pGpu) gpuUpdateUserSharedData_IMPL(pGpu)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
NV_STATUS gpuValidateRegOffset_IMPL(struct OBJGPU *pGpu, NvU32 arg0);
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
@ -4523,6 +4516,38 @@ static inline NV_STATUS gpuGc6Exit(struct OBJGPU *pGpu, NV2080_CTRL_GC6_EXIT_PAR
|
||||
#define gpuGc6Exit(pGpu, arg0) gpuGc6Exit_IMPL(pGpu, arg0)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
void gpuDestroyRusdMemory_IMPL(struct OBJGPU *pGpu);
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline void gpuDestroyRusdMemory(struct OBJGPU *pGpu) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuDestroyRusdMemory(pGpu) gpuDestroyRusdMemory_IMPL(pGpu)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
NV_STATUS gpuEnableAccounting_IMPL(struct OBJGPU *arg0);
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline NV_STATUS gpuEnableAccounting(struct OBJGPU *arg0) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuEnableAccounting(arg0) gpuEnableAccounting_IMPL(arg0)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
NV_STATUS gpuDisableAccounting_IMPL(struct OBJGPU *arg0, NvBool bForce);
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline NV_STATUS gpuDisableAccounting(struct OBJGPU *arg0, NvBool bForce) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuDisableAccounting(arg0, bForce) gpuDisableAccounting_IMPL(arg0, bForce)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
#undef PRIVATE_FIELD
|
||||
|
||||
|
||||
|
@ -2159,6 +2159,16 @@ NV_STATUS rpcMapMemoryDma_STUB(
|
||||
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
|
||||
}
|
||||
|
||||
// RPC:hal:CTRL_SET_VGPU_FB_USAGE - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
|
||||
NV_STATUS rpcCtrlSetVgpuFbUsage_STUB(
|
||||
POBJGPU pGpu,
|
||||
POBJRPC pRpc,
|
||||
void *pArg3
|
||||
)
|
||||
{
|
||||
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
|
||||
}
|
||||
|
||||
// RPC:hal:UNMAP_MEMORY_DMA - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
|
||||
NV_STATUS rpcUnmapMemoryDma_STUB(
|
||||
POBJGPU pGpu,
|
||||
|
@ -1303,17 +1303,6 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
|
||||
pThis->__kbusGetEccCounts__ = &kbusGetEccCounts_4a4dee;
|
||||
}
|
||||
|
||||
// Hal function -- kbusClearEccCounts
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_GH100;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_b3696a;
|
||||
}
|
||||
|
||||
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBus_engstateConstructEngine;
|
||||
|
||||
pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelBus_engstateStatePreInitLocked;
|
||||
|
@ -428,7 +428,6 @@ struct KernelBus {
|
||||
void (*__kbusTeardownCoherentCpuMapping__)(struct OBJGPU *, struct KernelBus *, NvBool);
|
||||
NV_STATUS (*__kbusBar1InstBlkBind__)(struct OBJGPU *, struct KernelBus *);
|
||||
NvU32 (*__kbusGetEccCounts__)(struct OBJGPU *, struct KernelBus *);
|
||||
void (*__kbusClearEccCounts__)(struct OBJGPU *, struct KernelBus *);
|
||||
NV_STATUS (*__kbusStateInitUnlocked__)(POBJGPU, struct KernelBus *);
|
||||
void (*__kbusInitMissing__)(POBJGPU, struct KernelBus *);
|
||||
NV_STATUS (*__kbusStatePreInitUnlocked__)(POBJGPU, struct KernelBus *);
|
||||
@ -730,8 +729,6 @@ NV_STATUS __nvoc_objCreate_KernelBus(KernelBus**, Dynamic*, NvU32);
|
||||
#define kbusBar1InstBlkBind_HAL(pGpu, pKernelBus) kbusBar1InstBlkBind_DISPATCH(pGpu, pKernelBus)
|
||||
#define kbusGetEccCounts(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
|
||||
#define kbusGetEccCounts_HAL(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
|
||||
#define kbusClearEccCounts(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
|
||||
#define kbusClearEccCounts_HAL(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
|
||||
#define kbusStateInitUnlocked(pGpu, pEngstate) kbusStateInitUnlocked_DISPATCH(pGpu, pEngstate)
|
||||
#define kbusInitMissing(pGpu, pEngstate) kbusInitMissing_DISPATCH(pGpu, pEngstate)
|
||||
#define kbusStatePreInitUnlocked(pGpu, pEngstate) kbusStatePreInitUnlocked_DISPATCH(pGpu, pEngstate)
|
||||
@ -2531,16 +2528,6 @@ static inline NvU32 kbusGetEccCounts_DISPATCH(struct OBJGPU *pGpu, struct Kernel
|
||||
return pKernelBus->__kbusGetEccCounts__(pGpu, pKernelBus);
|
||||
}
|
||||
|
||||
void kbusClearEccCounts_GH100(struct OBJGPU *pGpu, struct KernelBus *pKernelBus);
|
||||
|
||||
static inline void kbusClearEccCounts_b3696a(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void kbusClearEccCounts_DISPATCH(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
|
||||
pKernelBus->__kbusClearEccCounts__(pGpu, pKernelBus);
|
||||
}
|
||||
|
||||
static inline NV_STATUS kbusStateInitUnlocked_DISPATCH(POBJGPU pGpu, struct KernelBus *pEngstate) {
|
||||
return pEngstate->__kbusStateInitUnlocked__(pGpu, pEngstate);
|
||||
}
|
||||
@ -2625,6 +2612,10 @@ static inline NvBool kbusIsBarAccessBlocked(struct KernelBus *pKernelBus) {
|
||||
return pKernelBus->bBarAccessBlocked;
|
||||
}
|
||||
|
||||
static inline void kbusSetFlaSupported(struct KernelBus *pKernelBus, NvBool bSupported) {
|
||||
pKernelBus->bFlaSupported = bSupported;
|
||||
}
|
||||
|
||||
void kbusDestruct_IMPL(struct KernelBus *pKernelBus);
|
||||
|
||||
#define __nvoc_kbusDestruct(pKernelBus) kbusDestruct_IMPL(pKernelBus)
|
||||
@ -2719,6 +2710,9 @@ static inline NV_STATUS kbusIsGpuP2pAlive(struct OBJGPU *pGpu, struct KernelBus
|
||||
#define kbusIsGpuP2pAlive(pGpu, pKernelBus) kbusIsGpuP2pAlive_IMPL(pGpu, pKernelBus)
|
||||
#endif //__nvoc_kern_bus_h_disabled
|
||||
|
||||
NV_STATUS kbusUpdateRusdStatistics_IMPL(struct OBJGPU *pGpu);
|
||||
|
||||
#define kbusUpdateRusdStatistics(pGpu) kbusUpdateRusdStatistics_IMPL(pGpu)
|
||||
void kbusDetermineBar1Force64KBMapping_IMPL(struct KernelBus *pKernelBus);
|
||||
|
||||
#ifdef __nvoc_kern_bus_h_disabled
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user