550.54.14

This commit is contained in:
Bernhard Stoeckner 2024-02-23 16:37:56 +01:00
parent 91676d6628
commit 476bd34534
No known key found for this signature in database
GPG Key ID: 7D23DC2750FAC2E1
186 changed files with 42509 additions and 37629 deletions

View File

@ -2,6 +2,12 @@
## Release 550 Entries
### [550.54.14] 2024-02-23
#### Added
- Added vGPU Host and vGPU Guest support. For vGPU Host, please refer to the README.vgpu packaged in the vGPU Host Package for more details.
### [550.40.07] 2024-01-24
#### Fixed

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 550.40.07.
version 550.54.14.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
550.40.07 driver release. This can be achieved by installing
550.54.14 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -188,7 +188,10 @@ encountered specific to them.
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.07/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.54.14/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -650,7 +653,9 @@ Subsystem Device ID.
| NVIDIA T400 4GB | 1FF2 103C 1613 |
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
| NVIDIA T400E | 1FF2 10DE 18FF |
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
| NVIDIA T400E | 1FF2 17AA 18FF |
| Quadro T1000 | 1FF9 |
| NVIDIA A100-SXM4-40GB | 20B0 |
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
@ -746,12 +751,15 @@ Subsystem Device ID.
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
| NVIDIA H800 | 2324 10DE 17A6 |
| NVIDIA H800 | 2324 10DE 17A8 |
| NVIDIA H20 | 2329 10DE 198B |
| NVIDIA H20 | 2329 10DE 198C |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
| NVIDIA GH200 120GB | 2342 10DE 1805 |
| NVIDIA GH200 480GB | 2342 10DE 1809 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
@ -805,6 +813,7 @@ Subsystem Device ID.
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
| NVIDIA GeForce RTX 3050 | 2582 |
| NVIDIA GeForce RTX 3050 | 2584 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
@ -846,6 +855,7 @@ Subsystem Device ID.
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
@ -854,6 +864,7 @@ Subsystem Device ID.
| NVIDIA L40S | 26B9 10DE 1851 |
| NVIDIA L40S | 26B9 10DE 18CF |
| NVIDIA L20 | 26BA 10DE 1957 |
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
@ -891,6 +902,10 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 4060 | 2882 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.07\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.14\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@ -621,6 +621,14 @@ typedef enum
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
/*
* For console setup by EFI GOP, the base address is BAR1.
* For console setup by VBIOS, the base address is BAR2 + 16MB.
*/
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
((nv)->iommus.iso_iommu_present)
@ -878,6 +886,8 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
NvU32 NV_API_CALL nv_get_os_type(void);
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
struct dma_buf;
typedef struct nv_dma_buf nv_dma_buf_t;
struct drm_gem_object;

View File

@ -956,12 +956,20 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
- This function should not be called when interrupts are disabled.
Arguments:
device[IN] - Device handle associated with the gpu
pFaultInfo[IN] - information provided by RM for fault handling.
used for obtaining the device handle without locks.
bCopyAndFlush[IN] - Instructs RM to perform the flush in the Copy+Flush mode.
In this mode, RM will perform a copy of the packets from
the HW buffer to UVM's SW buffer as part of performing
the flush. This mode gives UVM the opportunity to observe
the packets contained within the HW buffer at the time
of issuing the call.
Error codes:
NV_ERR_INVALID_ARGUMENT
*/
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
NvBool bCopyAndFlush);
/*******************************************************************************
nvUvmInterfaceTogglePrefetchFaults
@ -982,7 +990,8 @@ NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
Error codes:
NV_ERR_INVALID_ARGUMENT
*/
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable);
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
NvBool bEnable);
/*******************************************************************************
nvUvmInterfaceInitAccessCntrInfo

View File

@ -700,8 +700,10 @@ typedef struct UvmGpuInfo_tag
// local EGM properties
// NV_TRUE if EGM is enabled
NvBool egmEnabled;
// Peer ID to reach local EGM when EGM is enabled
NvU8 egmPeerId;
// EGM base address to offset in the GMMU PTE entry for EGM mappings
NvU64 egmBaseAddr;
} UvmGpuInfo;
@ -712,9 +714,10 @@ typedef struct UvmGpuFbInfo_tag
// RM regions that are not registered with PMA either.
NvU64 maxAllocatableAddress;
NvU32 heapSize; // RAM in KB available for user allocations
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
NvBool bZeroFb; // Zero FB mode enabled.
NvU32 heapSize; // RAM in KB available for user allocations
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
NvBool bZeroFb; // Zero FB mode enabled.
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
} UvmGpuFbInfo;
typedef struct UvmGpuEccInfo_tag

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -160,10 +160,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
NvBool NV_API_CALL os_is_grid_supported (void);
NvU32 NV_API_CALL os_get_grid_csp_support (void);
void NV_API_CALL os_get_screen_info (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
void NV_API_CALL os_bug_check (NvU32, const char *);
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
@ -198,6 +197,8 @@ nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry (nv_cap_t *, const char *,
void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
void NV_API_CALL os_nv_cap_close_fd (int);
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
NvS32 NV_API_CALL os_imex_channel_count (void);
enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
@ -219,6 +220,7 @@ extern NvU8 os_page_shift;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
extern NvBool os_imex_channel_is_supported;
/*
* ---------------------------------------------------------------------------

View File

@ -75,7 +75,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_toggle_prefetch_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);

View File

@ -96,5 +96,6 @@ NV_HEADER_PRESENCE_TESTS = \
soc/tegra/bpmp.h \
linux/sync_file.h \
linux/cc_platform.h \
asm/cpufeature.h
asm/cpufeature.h \
linux/mpi.h

View File

@ -58,7 +58,7 @@
#ifndef _UVM_H_
#define _UVM_H_
#define UVM_API_LATEST_REVISION 9
#define UVM_API_LATEST_REVISION 11
#if !defined(UVM_API_REVISION)
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
@ -297,7 +297,9 @@ NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU for which pageable memory access support is queried.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition for which
// pageable memory access support is queried.
//
// pageableMemAccess: (OUTPUT)
// Returns true (non-zero) if the GPU represented by gpuUuid supports
@ -327,6 +329,12 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
// usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
// process results in an error.
//
// After successfully registering a GPU partition, all subsequent API calls
// which take a NvProcessorUuid argument (including UvmGpuMappingAttributes),
// must use the GI partition UUID which can be obtained with
// NvRmControl(NVC637_CTRL_CMD_GET_UUID). Otherwise, if the GPU is not SMC
// capable or SMC enabled, the physical GPU UUID must be used.
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the physical GPU to register.
@ -431,7 +439,8 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU to unregister.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to unregister.
//
// Error codes:
// NV_ERR_INVALID_DEVICE:
@ -489,7 +498,8 @@ NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU to register.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to register.
//
// platformParams: (INPUT)
// On Linux: RM ctrl fd, hClient and hVaSpace.
@ -560,7 +570,9 @@ NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid *gpuUuid,
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU whose VA space should be unregistered.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition whose VA space
// should be unregistered.
//
// Error codes:
// NV_ERR_INVALID_DEVICE:
@ -590,7 +602,7 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
//
// The two GPUs must be connected via PCIe. An error is returned if the GPUs are
// not connected or are connected over an interconnect different than PCIe
// (NVLink, for example).
// (NVLink or SMC partitions, for example).
//
// If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
// must support the same set of page sizes for GPU mappings.
@ -603,10 +615,12 @@ NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
//
// Arguments:
// gpuUuidA: (INPUT)
// UUID of GPU A.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition A.
//
// gpuUuidB: (INPUT)
// UUID of GPU B.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition B.
//
// Error codes:
// NV_ERR_NO_MEMORY:
@ -652,10 +666,12 @@ NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
//
// Arguments:
// gpuUuidA: (INPUT)
// UUID of GPU A.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition A.
//
// gpuUuidB: (INPUT)
// UUID of GPU B.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition B.
//
// Error codes:
// NV_ERR_INVALID_DEVICE:
@ -700,7 +716,9 @@ NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU that the channel is associated with.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition that the channel is
// associated with.
//
// platformParams: (INPUT)
// On Linux: RM ctrl fd, hClient and hChannel.
@ -1139,11 +1157,14 @@ NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
// Length, in bytes, of the range.
//
// preferredLocationUuid: (INPUT)
// UUID of the preferred location for this VA range.
// UUID of the CPU, UUID of the physical GPU if the GPU is not SMC
// capable or SMC enabled, or the GPU instance UUID of the partition of
// the preferred location for this VA range.
//
// accessedByUuids: (INPUT)
// UUIDs of all processors that should have persistent mappings to this
// VA range.
// UUID of the CPU, UUID of the physical GPUs if the GPUs are not SMC
// capable or SMC enabled, or the GPU instance UUID of the partitions
// that should have persistent mappings to this VA range.
//
// accessedByCount: (INPUT)
// Number of elements in the accessedByUuids array.
@ -1421,7 +1442,9 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
// Length, in bytes, of the range.
//
// destinationUuid: (INPUT)
// UUID of the destination processor to migrate pages to.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
// migrate pages to.
//
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if the destination processor is
@ -1499,7 +1522,9 @@ NV_STATUS UvmMigrate(void *base,
// Length, in bytes, of the range.
//
// destinationUuid: (INPUT)
// UUID of the destination processor to migrate pages to.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
// migrate pages to.
//
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if the destination processor is
@ -1576,7 +1601,9 @@ NV_STATUS UvmMigrateAsync(void *base,
// Id of the range group whose associated VA ranges have to be migrated.
//
// destinationUuid: (INPUT)
// UUID of the destination processor to migrate pages to.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID to
// migrate pages to.
//
// Error codes:
// NV_ERR_OBJECT_NOT_FOUND:
@ -1938,7 +1965,9 @@ NV_STATUS UvmMapExternalAllocation(void *base,
//
//
// gpuUuid: (INPUT)
// UUID of the GPU to map the sparse region on.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to map the sparse
// region on.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
@ -1995,7 +2024,9 @@ NV_STATUS UvmMapExternalSparse(void *base,
// The length of the virtual address range.
//
// gpuUuid: (INPUT)
// UUID of the GPU to unmap the VA range from.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to unmap the VA
// range from.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
@ -2062,7 +2093,9 @@ NV_STATUS UvmUnmapExternalAllocation(void *base,
// supported by the GPU.
//
// gpuUuid: (INPUT)
// UUID of the GPU to map the dynamic parallelism region on.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to map the
// dynamic parallelism region on.
//
// Errors:
// NV_ERR_UVM_ADDRESS_IN_USE:
@ -2293,7 +2326,9 @@ NV_STATUS UvmDisableReadDuplication(void *base,
// Length, in bytes, of the range.
//
// preferredLocationUuid: (INPUT)
// UUID of the preferred location.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID
// preferred location.
//
// preferredCpuMemoryNode: (INPUT)
// Preferred CPU NUMA memory node used if preferredLocationUuid is the
@ -2469,8 +2504,9 @@ NV_STATUS UvmUnsetPreferredLocation(void *base,
// Length, in bytes, of the range.
//
// accessedByUuid: (INPUT)
// UUID of the processor that should have pages in the the VA range
// mapped when possible.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID
// that should have pages in the VA range mapped when possible.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
@ -2538,8 +2574,10 @@ NV_STATUS UvmSetAccessedBy(void *base,
// Length, in bytes, of the range.
//
// accessedByUuid: (INPUT)
// UUID of the processor from which any policies set by
// UvmSetAccessedBy should be revoked for the given VA range.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID
// from which any policies set by UvmSetAccessedBy should be revoked
// for the given VA range.
//
// Errors:
// NV_ERR_INVALID_ADDRESS:
@ -2597,7 +2635,9 @@ NV_STATUS UvmUnsetAccessedBy(void *base,
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU to enable software-assisted system-wide atomics on.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to enable
// software-assisted system-wide atomics on.
//
// Error codes:
// NV_ERR_NO_MEMORY:
@ -2633,7 +2673,9 @@ NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
//
// Arguments:
// gpuUuid: (INPUT)
// UUID of the GPU to disable software-assisted system-wide atomics on.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition to disable
// software-assisted system-wide atomics on.
//
// Error codes:
// NV_ERR_INVALID_DEVICE:
@ -2862,7 +2904,9 @@ NV_STATUS UvmDebugCountersEnable(UvmDebugSession session,
// Name of the counter in that scope.
//
// gpu: (INPUT)
// Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, or the GPU instance UUID of the partition of the scoped GPU.
// This parameter is ignored in AllGpu scopes.
//
// pCounterHandle: (OUTPUT)
// Handle to the counter address.
@ -2916,7 +2960,7 @@ NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session,
// UvmEventQueueCreate
//
// This call creates an event queue of the given size.
// No events are added in the queue till they are enabled by the user.
// No events are added in the queue until they are enabled by the user.
// Event queue data is visible to the user even after the target process dies
// if the session is active and queue is not freed.
//
@ -2967,7 +3011,7 @@ NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle,
// UvmEventQueueDestroy
//
// This call frees all interal resources associated with the queue, including
// upinning of the memory associated with that queue. Freeing user buffer is
// unpinning of the memory associated with that queue. Freeing user buffer is
// responsibility of a caller. Event queue might be also destroyed as a side
// effect of destroying a session associated with this queue.
//
@ -3151,9 +3195,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
// UvmEventGetGpuUuidTable
//
// Each migration event entry contains the gpu index to/from where data is
// migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
// Using indices saves on the size of each event entry. This API provides the
// gpuIndex to gpuUuid relation to the user.
// migrated. This index maps to a corresponding physical gpu UUID in the
// gpuUuidTable. Using indices saves on the size of each event entry. This API
// provides the gpuIndex to gpuUuid relation to the user.
//
// This API does not access the queue state maintained in the user
// library and so the user doesn't need to acquire a lock to protect the
@ -3161,9 +3205,9 @@ NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray,
//
// Arguments:
// gpuUuidTable: (OUTPUT)
// The return value is an array of UUIDs. The array index is the
// corresponding gpuIndex. There can be at max 32 gpus associated with
// UVM, so array size is 32.
// The return value is an array of physical GPU UUIDs. The array index
// is the corresponding gpuIndex. There can be at max 32 GPUs
// associated with UVM, so array size is 32.
//
// validCount: (OUTPUT)
// The system doesn't normally contain 32 GPUs. This field gives the
@ -3222,7 +3266,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
//------------------------------------------------------------------------------
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
UvmEventQueueHandle queueHandle,
UvmEventEntry *pBuffer,
UvmEventEntry_V1 *pBuffer,
NvU64 *nEntries);
//------------------------------------------------------------------------------
@ -3418,10 +3462,15 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
//
#if UVM_API_REV_IS_AT_MOST(10)
// This is deprecated and replaced by sizeof(UvmToolsEventControlData_V1) or
// sizeof(UvmToolsEventControlData_V2).
NvLength UvmToolsGetEventControlSize(void);
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
// sizeof(UvmEventEntry_V2).
NvLength UvmToolsGetEventEntrySize(void);
#endif
NvLength UvmToolsGetNumberOfCounters(void);
@ -3436,6 +3485,12 @@ NvLength UvmToolsGetNumberOfCounters(void);
// session: (INPUT)
// Handle to the tools session.
//
// version: (INPUT)
// Requested version for events or counters.
// See UvmEventEntry_V1 and UvmEventEntry_V2.
// UvmToolsEventControlData_V2::version records the entry version that
// will be generated.
//
// event_buffer: (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold at least event_buffer_size events. Gets pinned until queue is
@ -3447,10 +3502,9 @@ NvLength UvmToolsGetNumberOfCounters(void);
//
// event_control (INPUT)
// User allocated buffer. Must be page-aligned. Must be large enough to
// hold UvmToolsEventControlData (although single page-size allocation
// should be more than enough). One could call
// UvmToolsGetEventControlSize() function to find out current size of
// UvmToolsEventControlData. Gets pinned until queue is destroyed.
// hold UvmToolsEventControlData_V1 if version is UvmEventEntry_V1 or
// UvmToolsEventControlData_V2 (although single page-size allocation
// should be more than enough). Gets pinned until queue is destroyed.
//
// queue: (OUTPUT)
// Handle to the created queue.
@ -3460,22 +3514,32 @@ NvLength UvmToolsGetNumberOfCounters(void);
// Session handle does not refer to a valid session
//
// NV_ERR_INVALID_ARGUMENT:
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
// One of the parameters: event_buffer, event_buffer_size, event_control
// is not valid
//
// NV_ERR_INSUFFICIENT_RESOURCES:
// There could be multiple reasons for this error. One would be that it's
// not possible to allocate a queue of requested size. Another would be
// that either event_buffer or event_control memory couldn't be pinned
// (e.g. because of OS limitation of pinnable memory). Also it could not
// have been possible to create UvmToolsEventQueueDescriptor.
// There could be multiple reasons for this error. One would be that
// it's not possible to allocate a queue of requested size. Another
// would be either event_buffer or event_control memory couldn't be
// pinned (e.g. because of OS limitation of pinnable memory). Also it
// could not have been possible to create UvmToolsEventQueueDescriptor.
//
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(10)
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
#else
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
UvmToolsEventQueueVersion version,
void *event_buffer,
NvLength event_buffer_size,
void *event_control,
UvmToolsEventQueueHandle *queue);
#endif
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
@ -3512,7 +3576,7 @@ NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
//------------------------------------------------------------------------------
// UvmToolsDestroyEventQueue
//
// Destroys all internal resources associated with the queue. It unpinns the
// Destroys all internal resources associated with the queue. It unpins the
// buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
// destroyed when corresponding session gets destroyed.
//
@ -3534,7 +3598,7 @@ NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
// UvmEventQueueEnableEvents
//
// This call enables a particular event type in the event queue. All events are
// disabled by default. Any event type is considered listed if and only if it's
// disabled by default. Any event type is considered listed if and only if its
// corresponding value is equal to 1 (in other words, bit is set). Disabled
// events listed in eventTypeFlags are going to be enabled. Enabled events and
// events not listed in eventTypeFlags are not affected by this call.
@ -3567,7 +3631,7 @@ NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
// UvmToolsEventQueueDisableEvents
//
// This call disables a particular event type in the event queue. Any event type
// is considered listed if and only if it's corresponding value is equal to 1
// is considered listed if and only if its corresponding value is equal to 1
// (in other words, bit is set). Enabled events listed in eventTypeFlags are
// going to be disabled. Disabled events and events not listed in eventTypeFlags
// are not affected by this call.
@ -3605,7 +3669,7 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
//
// Counters position follows the layout of the memory that UVM driver decides to
// use. To obtain particular counter value, user should perform consecutive
// atomic reads at a a given buffer + offset address.
// atomic reads at a given buffer + offset address.
//
// It is not defined what is the initial value of a counter. User should rely on
// a difference between each snapshot.
@ -3628,9 +3692,9 @@ NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
// Provided session is not valid
//
// NV_ERR_INSUFFICIENT_RESOURCES
// There could be multiple reasons for this error. One would be that it's
// not possible to allocate counters structure. Another would be that
// either event_buffer or event_control memory couldn't be pinned
// There could be multiple reasons for this error. One would be that
// it's not possible to allocate counters structure. Another would be
// that either event_buffer or event_control memory couldn't be pinned
// (e.g. because of OS limitation of pinnable memory)
//
//------------------------------------------------------------------------------
@ -3641,12 +3705,12 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
//------------------------------------------------------------------------------
// UvmToolsCreateProcessorCounters
//
// Creates the counters structure for tracking per-process counters.
// Creates the counters structure for tracking per-processor counters.
// These counters are disabled by default.
//
// Counters position follows the layout of the memory that UVM driver decides to
// use. To obtain particular counter value, user should perform consecutive
// atomic reads at a a given buffer + offset address.
// atomic reads at a given buffer + offset address.
//
// It is not defined what is the initial value of a counter. User should rely on
// a difference between each snapshot.
@ -3662,7 +3726,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
// counters are destroyed.
//
// processorUuid: (INPUT)
// UUID of the resource, for which counters will provide statistic data.
// UUID of the physical GPU if the GPU is not SMC capable or SMC
// enabled, the GPU instance UUID of the partition, or the CPU UUID of
// the resource, for which counters will provide statistic data.
//
// counters: (OUTPUT)
// Handle to the created counters.
@ -3672,9 +3738,9 @@ NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session
// session handle does not refer to a valid tools session
//
// NV_ERR_INSUFFICIENT_RESOURCES
// There could be multiple reasons for this error. One would be that it's
// not possible to allocate counters structure. Another would be that
// either event_buffer or event_control memory couldn't be pinned
// There could be multiple reasons for this error. One would be that
// it's not possible to allocate counters structure. Another would be
// that either event_buffer or event_control memory couldn't be pinned
// (e.g. because of OS limitation of pinnable memory)
//
// NV_ERR_INVALID_ARGUMENT
@ -3690,7 +3756,7 @@ NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle session,
// UvmToolsDestroyCounters
//
// Destroys all internal resources associated with this counters structure.
// It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
// It unpins the buffer provided in UvmToolsCreate*Counters. Counters structure
// also gest destroyed when corresponding session is destroyed.
//
// Arguments:
@ -3711,7 +3777,7 @@ NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
// UvmToolsEnableCounters
//
// This call enables certain counter types in the counters structure. Any
// counter type is considered listed if and only if it's corresponding value is
// counter type is considered listed if and only if its corresponding value is
// equal to 1 (in other words, bit is set). Disabled counter types listed in
// counterTypeFlags are going to be enabled. Already enabled counter types and
// counter types not listed in counterTypeFlags are not affected by this call.
@ -3745,7 +3811,7 @@ NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
// UvmToolsDisableCounters
//
// This call disables certain counter types in the counters structure. Any
// counter type is considered listed if and only if it's corresponding value is
// counter type is considered listed if and only if its corresponding value is
// equal to 1 (in other words, bit is set). Enabled counter types listed in
// counterTypeFlags are going to be disabled. Already disabled counter types and
// counter types not listed in counterTypeFlags are not affected by this call.
@ -3890,32 +3956,72 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
// UvmToolsGetProcessorUuidTable
//
// Populate a table with the UUIDs of all the currently registered processors
// in the target process. When a GPU is registered, it is added to the table.
// When a GPU is unregistered, it is removed. As long as a GPU remains registered,
// its index in the table does not change. New registrations obtain the first
// unused index.
// in the target process. When a GPU is registered, it is added to the table.
// When a GPU is unregistered, it is removed. As long as a GPU remains
// registered, its index in the table does not change.
// Note that the index in the table corresponds to the processor ID reported
// in UvmEventEntry event records and that the table is not contiguously packed
// with non-zero UUIDs even with no GPU unregistrations.
//
// Arguments:
// session: (INPUT)
// Handle to the tools session.
//
// version: (INPUT)
// Requested version for the UUID table returned. The version must
// match the requested version of the event queue created with
// UvmToolsCreateEventQueue().
// See UvmEventEntry_V1 and UvmEventEntry_V2.
//
// table: (OUTPUT)
// Array of processor UUIDs, including the CPU's UUID which is always
// at index zero. The srcIndex and dstIndex fields of the
// UvmEventMigrationInfo struct index this array. Unused indices will
// have a UUID of zero.
// have a UUID of zero. Version UvmEventEntry_V1 only uses GPU UUIDs
// for the UUID of the physical GPU and only supports a single SMC
// partition registered per process. Version UvmEventEntry_V2 supports
// multiple SMC partitions registered per process and uses physical GPU
// UUIDs if the GPU is not SMC capable or SMC enabled and GPU instance
// UUIDs for SMC partitions.
// The table pointer can be NULL in which case, the size of the table
// needed to hold all the UUIDs is returned in 'count'.
//
// table_size: (INPUT)
// The size of the table in number of array elements. This can be
// zero if the table pointer is NULL.
//
// count: (OUTPUT)
// Set by UVM to the number of UUIDs written, including any gaps in
// the table due to unregistered GPUs.
// On output, it is set by UVM to the number of UUIDs needed to hold
// all the UUIDs, including any gaps in the table due to unregistered
// GPUs.
//
// Error codes:
// NV_ERR_INVALID_ADDRESS:
// writing to table failed.
// writing to table failed or the count pointer was invalid.
//
// NV_ERR_INVALID_ARGUMENT:
// The version is not UvmEventEntry_V1 or UvmEventEntry_V2.
// The count pointer is NULL.
// See UvmToolsEventQueueVersion.
//
// NV_WARN_MISMATCHED_TARGET:
// The kernel returned a table suitable for UvmEventEntry_V1 events.
// (i.e., the kernel is older and doesn't support UvmEventEntry_V2).
//
// NV_ERR_NO_MEMORY:
// Internal memory allocation failed.
//------------------------------------------------------------------------------
#if UVM_API_REV_IS_AT_MOST(10)
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
NvProcessorUuid *table,
NvLength *count);
#else
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
UvmToolsEventQueueVersion version,
NvProcessorUuid *table,
NvLength table_size,
NvLength *count);
#endif
//------------------------------------------------------------------------------
// UvmToolsFlushEvents

View File

@ -34,16 +34,6 @@
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_ATS_PREFETCH_SUPPORTED() 1
#else
#define UVM_ATS_PREFETCH_SUPPORTED() 0
#endif
typedef struct
{
// Mask of gpu_va_spaces which are registered for ATS access. The mask is

View File

@ -30,7 +30,7 @@
#include <linux/mempolicy.h>
#include <linux/mmu_notifier.h>
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
#include <linux/hmm.h>
#endif
@ -246,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
return uvm_ats_region_from_start_end(start, end);
}
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
{
@ -284,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
#if UVM_ATS_PREFETCH_SUPPORTED()
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
int ret;
NvU64 start;
NvU64 end;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
struct hmm_range range;
uvm_page_index_t page_index;
uvm_va_block_region_t vma_region;
@ -370,6 +370,8 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
mmu_interval_notifier_remove(range.notifier);
#else
uvm_page_mask_zero(residency_mask);
#endif
return status;
@ -403,21 +405,24 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{
NV_STATUS status = NV_OK;
NV_STATUS status;
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
// Residency mask needs to be computed even if prefetching is disabled since
// the residency information is also needed by access counters servicing in
// uvm_ats_service_access_counters()
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status;
if (uvm_page_mask_empty(accessed_mask))
return status;
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
// Prefetch the entire region if none of the pages are resident on any node
// and if preferred_location is the faulting GPU.
if (ats_context->prefetch_state.has_preferred_location &&
@ -637,8 +642,18 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
ats_batch_select_residency(gpu_va_space, vma, ats_context);
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
// is just an optimization and servicing access counter migrations is still
// worthwhile even without any prefetching added. So, let servicing continue
// instead of returning early even if the prefetch computation fails.
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
// Remove pages which are already resident at the intended destination from
// the accessed_mask.
uvm_page_mask_andnot(&ats_context->accessed_mask,
&ats_context->accessed_mask,
&ats_context->prefetch_state.residency_mask);
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);

View File

@ -318,10 +318,11 @@ int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessor
unsigned i;
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
memcpy(buffer, "UVM-GPU-", 8);
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
return *buffer = 0;
memcpy(buffer, "UVM-GPU-", 8);
for (i = 0; i < 16; i++) {
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);

View File

@ -151,22 +151,6 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
return NV_OK;
}
static void fix_memory_info_uuid(uvm_va_space_t *va_space, UvmGpuMemoryInfo *mem_info)
{
uvm_gpu_t *gpu;
// TODO: Bug 4351121: RM will return the GI UUID, but
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
// Match on GI UUID until the UVM user level API has been updated to use
// the GI UUID.
for_each_va_space_gpu(gpu, va_space) {
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
mem_info->uuid = gpu->parent->uuid;
break;
}
}
}
static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_GET_RM_PTES_PARAMS *params)
{
NV_STATUS status = NV_OK;
@ -197,11 +181,6 @@ static NV_STATUS test_get_rm_ptes_single_gpu(uvm_va_space_t *va_space, UVM_TEST_
if (status != NV_OK)
return status;
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
// physical GPU UUID until the UVM user level has been updated to use
// the GI UUID.
fix_memory_info_uuid(va_space, &memory_info);
TEST_CHECK_GOTO(uvm_uuid_eq(&memory_info.uuid, &params->gpu_uuid), done);
TEST_CHECK_GOTO((memory_info.size == params->size), done);
@ -309,11 +288,6 @@ static NV_STATUS test_get_rm_ptes_multi_gpu(uvm_va_space_t *va_space, UVM_TEST_G
if (status != NV_OK)
return status;
// TODO: Bug 4351121: RM will return the GI UUID. Replace it with the
// physical GPU UUID until the UVM user level has been updated to use
// the GI UUID.
fix_memory_info_uuid(va_space, &memory_info);
memset(&ext_mapping_info, 0, sizeof(ext_mapping_info));
memset(pte_buffer, 0, sizeof(pte_buffer));

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -314,7 +314,7 @@ static NV_STATUS uvm_suspend(void)
// interrupts in the bottom half in the future, the bottom half flush
// below will no longer be able to guarantee that all outstanding
// notifications have been handled.
uvm_gpu_access_counters_set_ignore(gpu, true);
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, true);
uvm_parent_gpu_set_isr_suspended(gpu->parent, true);
@ -373,13 +373,13 @@ static NV_STATUS uvm_resume(void)
// Bring the fault buffer software state back in sync with the
// hardware state.
uvm_gpu_fault_buffer_resume(gpu->parent);
uvm_parent_gpu_fault_buffer_resume(gpu->parent);
uvm_parent_gpu_set_isr_suspended(gpu->parent, false);
// Reenable access counter interrupt processing unless notifications
// have been set to be suppressed.
uvm_gpu_access_counters_set_ignore(gpu, false);
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, false);
}
uvm_up_write(&g_uvm_global.pm.lock);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -59,6 +59,7 @@ MODULE_PARM_DESC(uvm_peer_copy, "Choose the addressing mode for peer copying, op
static void remove_gpu(uvm_gpu_t *gpu);
static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu);
static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu);
static void destroy_nvlink_peers(uvm_gpu_t *gpu);
@ -241,6 +242,8 @@ static NV_STATUS get_gpu_fb_info(uvm_gpu_t *gpu)
gpu->mem_info.max_allocatable_address = fb_info.maxAllocatableAddress;
}
gpu->mem_info.max_vidmem_page_size = fb_info.maxVidmemPageSize;
return NV_OK;
}
@ -843,11 +846,11 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
if (!uvm_procfs_is_enabled())
return NV_OK;
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), uvm_gpu_uuid(gpu));
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->parent->uuid);
gpu_base_dir_entry = uvm_procfs_get_gpu_base_dir();
// Create UVM-GPU-${UUID}/${sub_processor_index} directory
// Create UVM-GPU-${physical-UUID}/${sub_processor_index} directory
snprintf(gpu_dir_name, sizeof(gpu_dir_name), "%u", uvm_id_sub_processor_index(gpu->id));
gpu->procfs.dir = NV_CREATE_PROC_DIR(gpu_dir_name, gpu->parent->procfs.dir);
@ -855,7 +858,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
return NV_ERR_OPERATING_SYSTEM;
// Create symlink from ${gpu_id} to
// gpus/UVM-GPU-${UUID}/${sub_processor_index}
// UVM-GPU-${physical-UUID}/${sub_processor_index}
snprintf(symlink_name, sizeof(symlink_name), "%u", uvm_id_value(gpu->id));
snprintf(gpu_dir_name,
sizeof(gpu_dir_name),
@ -867,6 +870,16 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
if (gpu->procfs.dir_symlink == NULL)
return NV_ERR_OPERATING_SYSTEM;
if (gpu->parent->smc.enabled) {
// Create symlink from UVM-GPU-${GI-UUID} to
// UVM-GPU-${physical-UUID}/${sub_processor_index}
format_uuid_to_buffer(uuid_text_buffer, sizeof(uuid_text_buffer), &gpu->uuid);
gpu->procfs.gpu_instance_uuid_symlink = proc_symlink(uuid_text_buffer, gpu_base_dir_entry, gpu_dir_name);
if (gpu->procfs.gpu_instance_uuid_symlink == NULL)
return NV_ERR_OPERATING_SYSTEM;
}
// GPU peer files are debug only
if (!uvm_procfs_is_debug_enabled())
return NV_OK;
@ -882,6 +895,7 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
{
proc_remove(gpu->procfs.dir_peers);
proc_remove(gpu->procfs.gpu_instance_uuid_symlink);
proc_remove(gpu->procfs.dir_symlink);
proc_remove(gpu->procfs.dir);
}
@ -1038,6 +1052,7 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
NvU32 num_entries;
NvU64 va_size;
NvU64 va_per_entry;
uvm_mmu_page_table_alloc_t *tree_alloc;
status = uvm_page_tree_init(gpu,
NULL,
@ -1059,20 +1074,30 @@ static NV_STATUS configure_address_space(uvm_gpu_t *gpu)
// Make sure that RM's part of the VA is aligned to the VA covered by a
// single top level PDE.
UVM_ASSERT_MSG(gpu->parent->rm_va_base % va_per_entry == 0,
"va_base 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_base, va_per_entry);
"va_base 0x%llx va_per_entry 0x%llx\n",
gpu->parent->rm_va_base,
va_per_entry);
UVM_ASSERT_MSG(gpu->parent->rm_va_size % va_per_entry == 0,
"va_size 0x%llx va_per_entry 0x%llx\n", gpu->parent->rm_va_size, va_per_entry);
"va_size 0x%llx va_per_entry 0x%llx\n",
gpu->parent->rm_va_size,
va_per_entry);
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->big_page.internal_size));
UVM_ASSERT(uvm_mmu_page_size_supported(&gpu->address_space_tree, gpu->mem_info.max_vidmem_page_size));
tree_alloc = uvm_page_tree_pdb(&gpu->address_space_tree);
status = uvm_rm_locked_call(nvUvmInterfaceSetPageDirectory(gpu->rm_address_space,
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.address, num_entries,
uvm_page_tree_pdb(&gpu->address_space_tree)->addr.aperture == UVM_APERTURE_VID,
gpu_get_internal_pasid(gpu)));
tree_alloc->addr.address,
num_entries,
tree_alloc->addr.aperture == UVM_APERTURE_VID,
gpu_get_internal_pasid(gpu)));
if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceSetPageDirectory() failed: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
return status;
}
gpu->rm_address_space_moved_to_page_tree = true;
return NV_OK;
@ -1212,6 +1237,8 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
{
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
size_t len;
NV_STATUS status;
if (gpu->parent->smc.enabled) {
@ -1229,6 +1256,20 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
uvm_uuid_copy(&gpu->uuid, &gpu_info->uuid);
gpu->smc.swizz_id = gpu_info->smcSwizzId;
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->parent->uuid);
snprintf(gpu->name,
sizeof(gpu->name),
"ID %u: %s",
uvm_id_value(gpu->id),
uuid_buffer + 4);
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &gpu->uuid);
len = strlen(gpu->name);
snprintf(gpu->name + len,
sizeof(gpu->name) - len,
" UVM-GI-%s",
uuid_buffer + 8);
// Initialize the per-GPU procfs dirs as early as possible so that other
// parts of the driver can add files in them as part of their per-GPU init.
status = init_procfs_dirs(gpu);
@ -1338,7 +1379,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
uvm_parent_gpu_t *parent_gpu,
uvm_gpu_t **gpu_out)
{
char uuid_buffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
NV_STATUS status;
bool alloc_parent = (parent_gpu == NULL);
uvm_gpu_t *gpu = NULL;
@ -1364,13 +1404,6 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
if (alloc_parent)
fill_parent_gpu_info(parent_gpu, gpu_info);
format_uuid_to_buffer(uuid_buffer, sizeof(uuid_buffer), &parent_gpu->uuid);
snprintf(gpu->name,
sizeof(gpu->name),
"ID %u: %s",
uvm_id_value(gpu->id),
uuid_buffer);
// After this point all error clean up should be handled by remove_gpu()
if (!gpu_supports_uvm(parent_gpu)) {
@ -1432,13 +1465,25 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
if (alloc_parent) {
if (gpu->parent->smc.enabled) {
status = discover_smc_peers(gpu);
if (status != NV_OK) {
// Nobody can have retained the GPU yet, since we still hold the
// global lock.
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
atomic64_set(&gpu->retained_count, 0);
goto error;
}
}
else if (alloc_parent) {
status = discover_nvlink_peers(gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
UVM_ERR_PRINT("Failed to discover NVLINK peers: %s, GPU %s\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
// Nobody can have retained the GPU yet, since we still hold the global
// lock.
// Nobody can have retained the GPU yet, since we still hold the
// global lock.
UVM_ASSERT(uvm_gpu_retained_count(gpu) == 1);
atomic64_set(&gpu->retained_count, 0);
goto error;
@ -1686,7 +1731,7 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
for (sub_processor_index = 0; sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
for_each_sub_processor_index(sub_processor_index)
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
uvm_kvfree(parent_gpu);
@ -1915,32 +1960,25 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
return uvm_parent_gpu_get_by_uuid_locked(gpu_uuid);
}
static uvm_gpu_t *gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid)
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
{
uvm_gpu_id_t gpu_id;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id(gpu_id) {
uvm_gpu_t *gpu = uvm_gpu_get(gpu_id);
if (gpu) {
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid)) {
UVM_ASSERT(!gpu->parent->smc.enabled);
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
return gpu;
}
}
}
return NULL;
}
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid)
{
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
return gpu_get_by_uuid_locked(gpu_uuid);
}
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
static uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id)
{
uvm_gpu_t *gpu;
@ -1998,7 +2036,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (parent_gpu != NULL) {
// If the UUID has been seen before, and if SMC is enabled, then check
// if this specific partition has been seen previously. The UUID-based
// if this specific partition has been seen previously. The UUID-based
// look-up above may have succeeded for a different partition with the
// same parent GPU.
if (gpu_info->smcEnabled) {
@ -2287,7 +2325,7 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
return NV_ERR_OPERATING_SYSTEM;
// Create a symlink from UVM GPU UUID (UVM-GPU-...) to the UVM GPU ID gpuB
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), uvm_gpu_uuid(remote));
format_uuid_to_buffer(symlink_name, sizeof(symlink_name), &remote->uuid);
peer_caps->procfs.peer_symlink_file[local_idx] = proc_symlink(symlink_name,
local->procfs.dir_peers,
gpu_dir_name);
@ -2297,6 +2335,24 @@ static NV_STATUS init_procfs_peer_cap_files(uvm_gpu_t *local, uvm_gpu_t *remote,
return NV_OK;
}
static NV_STATUS init_procfs_peer_files(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
NV_STATUS status;
if (!uvm_procfs_is_debug_enabled())
return NV_OK;
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
if (status != NV_OK)
return status;
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
if (status != NV_OK)
return status;
return NV_OK;
}
static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
uvm_gpu_t *gpu1,
const UvmGpuP2PCapsParams *p2p_caps_params,
@ -2377,16 +2433,41 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
}
if (!uvm_procfs_is_debug_enabled())
return NV_OK;
return init_procfs_peer_files(gpu0, gpu1);
}
status = init_procfs_peer_cap_files(gpu0, gpu1, 0);
if (status != NV_OK)
return status;
static NV_STATUS discover_smc_peers(uvm_gpu_t *gpu)
{
NvU32 sub_processor_index;
uvm_gpu_t *other_gpu;
NV_STATUS status;
status = init_procfs_peer_cap_files(gpu1, gpu0, 1);
if (status != NV_OK)
return status;
UVM_ASSERT(gpu);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(gpu->parent->smc.enabled);
for_each_sub_processor_index(sub_processor_index) {
uvm_gpu_peer_t *peer_caps;
other_gpu = gpu->parent->gpus[sub_processor_index];
if (!other_gpu || other_gpu == gpu)
continue;
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
if (peer_caps->ref_count == 1)
continue;
UVM_ASSERT(peer_caps->ref_count == 0);
memset(peer_caps, 0, sizeof(*peer_caps));
peer_caps->ref_count = 1;
status = init_procfs_peer_files(gpu, other_gpu);
if (status != NV_OK) {
peer_caps->ref_count = 0;
return status;
}
}
return NV_OK;
}
@ -2489,9 +2570,7 @@ static NV_STATUS discover_nvlink_peers(uvm_gpu_t *gpu)
UVM_ASSERT(gpu);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (gpu->parent->smc.enabled)
return NV_OK;
UVM_ASSERT(!gpu->parent->smc.enabled);
for_each_gpu(other_gpu) {
UvmGpuP2PCapsParams p2p_caps_params;
@ -2592,10 +2671,6 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
UVM_ASSERT(gpu0);
UVM_ASSERT(gpu1);
// P2P is not supported under SMC partitioning
UVM_ASSERT(!gpu0->parent->smc.enabled);
UVM_ASSERT(!gpu1->parent->smc.enabled);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);
@ -2638,9 +2713,9 @@ static void disable_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
// IDs queried from the peer table above which are about to be removed from
// the global table.
if (gpu0->parent->access_counters_supported)
uvm_gpu_access_counter_buffer_flush(gpu0);
uvm_parent_gpu_access_counter_buffer_flush(gpu0->parent);
if (gpu1->parent->access_counters_supported)
uvm_gpu_access_counter_buffer_flush(gpu1);
uvm_parent_gpu_access_counter_buffer_flush(gpu1->parent);
memset(peer_caps, 0, sizeof(*peer_caps));
}
@ -2668,12 +2743,17 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
static uvm_aperture_t uvm_gpu_peer_caps_aperture(uvm_gpu_peer_t *peer_caps, uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu)
{
size_t peer_index;
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
// Indirect peers are accessed as sysmem addresses
if (peer_caps->is_indirect_peer)
return UVM_APERTURE_SYS;
// MIG instances in the same physical GPU have vidmem addresses
if (local_gpu->parent == remote_gpu->parent)
return UVM_APERTURE_VID;
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
if (uvm_id_value(local_gpu->id) < uvm_id_value(remote_gpu->id))
peer_index = 0;
else
@ -3285,12 +3365,19 @@ NV_STATUS uvm_api_register_gpu(UVM_REGISTER_GPU_PARAMS *params, struct file *fil
.user_client = params->hClient,
.user_object = params->hSmcPartRef,
};
NvProcessorUuid gpu_instance_uuid;
NV_STATUS status;
return uvm_va_space_register_gpu(va_space,
&params->gpu_uuid,
&user_rm_va_space,
&params->numaEnabled,
&params->numaNodeId);
status = uvm_va_space_register_gpu(va_space,
&params->gpu_uuid,
&user_rm_va_space,
&params->numaEnabled,
&params->numaNodeId,
&gpu_instance_uuid);
if (status == NV_OK)
uvm_uuid_copy(&params->gpu_uuid, &gpu_instance_uuid);
return status;
}
NV_STATUS uvm_api_unregister_gpu(UVM_UNREGISTER_GPU_PARAMS *params, struct file *filp)
@ -3363,10 +3450,10 @@ NV_STATUS uvm_test_set_prefetch_filtering(UVM_TEST_SET_PREFETCH_FILTERING_PARAMS
switch (params->filtering_mode) {
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_ALL:
uvm_gpu_disable_prefetch_faults(gpu->parent);
uvm_parent_gpu_disable_prefetch_faults(gpu->parent);
break;
case UVM_TEST_PREFETCH_FILTERING_MODE_FILTER_NONE:
uvm_gpu_enable_prefetch_faults(gpu->parent);
uvm_parent_gpu_enable_prefetch_faults(gpu->parent);
break;
default:
status = NV_ERR_INVALID_ARGUMENT;

View File

@ -618,9 +618,10 @@ struct uvm_gpu_struct
// The gpu's GI uuid if SMC is enabled; otherwise, a copy of parent->uuid.
NvProcessorUuid uuid;
// Nice printable name in the format: ID: 999: UVM-GPU-<parent_uuid>.
// Nice printable name in the format:
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
char name[9 + UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
// Refcount of the gpu, i.e. how many times it has been retained. This is
// roughly a count of how many times it has been registered with a VA space,
@ -656,6 +657,10 @@ struct uvm_gpu_struct
// can allocate through PMM (PMA).
NvU64 max_allocatable_address;
// Max supported vidmem page size may be smaller than the max GMMU page
// size, because of the vMMU supported page sizes.
NvU64 max_vidmem_page_size;
struct
{
// True if the platform supports HW coherence and the GPU's memory
@ -844,6 +849,9 @@ struct uvm_gpu_struct
struct proc_dir_entry *dir_symlink;
// The GPU instance UUID symlink if SMC is enabled.
struct proc_dir_entry *gpu_instance_uuid_symlink;
struct proc_dir_entry *info_file;
struct proc_dir_entry *dir_peers;
@ -1210,11 +1218,6 @@ static const char *uvm_gpu_name(uvm_gpu_t *gpu)
return gpu->name;
}
static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
{
return &gpu->parent->uuid;
}
static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
{
if (gpu->parent->smc.enabled)
@ -1234,6 +1237,9 @@ struct uvm_gpu_peer_struct
// - The global lock is held.
//
// - While the global lock was held in the past, the two GPUs were detected
// to be SMC peers and were both retained.
//
// - While the global lock was held in the past, the two GPUs were detected
// to be NVLINK peers and were both retained.
//
// - While the global lock was held in the past, the two GPUs were detected
@ -1319,17 +1325,17 @@ static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struc
// Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
// a circular dep between global and gpu modules.
// Get a uvm_gpu_t by UUID. This returns NULL if the GPU is not present. This
// is the general purpose call that should be used normally.
// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
// instead.
// Get a uvm_gpu_t by UUID (physical GPU UUID if SMC is not enabled, otherwise
// GPU instance UUID).
// This returns NULL if the GPU is not present.
// This is the general purpose call that should be used normally.
//
// LOCKING: requires the global lock to be held
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
// Get a uvm_parent_gpu_t by UUID. Like uvm_gpu_get_by_uuid(), this function
// returns NULL if the GPU has not been registered.
// Get a uvm_parent_gpu_t by UUID (physical GPU UUID).
// Like uvm_gpu_get_by_uuid(), this function returns NULL if the GPU has not
// been registered.
//
// LOCKING: requires the global lock to be held
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
@ -1340,13 +1346,6 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
// limited cases.
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
// the partition hasn't been registered. This call needs to be used instead of
// uvm_gpu_get_by_uuid() when a specific partition is targeted.
//
// LOCKING: requires the global lock to be held
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);
// Retain a gpu by uuid
// Returns the retained uvm_gpu_t in gpu_out on success
//

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2023 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -33,7 +33,7 @@
#include "uvm_va_space_mm.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_perf_module.h"
#include "uvm_ats_ibm.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
@ -99,7 +99,8 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
"Number of remote accesses on a region required to trigger a notification."
"Valid values: [1, 65535]");
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode);
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
uvm_gpu_buffer_flush_mode_t flush_mode);
static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
@ -126,7 +127,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
// Whether access counter migrations are enabled or not. The policy is as
// follows:
// - MIMC migrations are disabled by default on all systems except P9.
// - MIMC migrations are disabled by default on all non-ATS systems.
// - MOMC migrations are disabled by default on all systems
// - Users can override this policy by specifying on/off
static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -149,7 +150,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return false;
if (UVM_ATS_IBM_SUPPORTED())
if (UVM_ATS_SUPPORTED())
return g_uvm_global.ats.supported;
return false;
@ -281,7 +282,7 @@ get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm
&(access_counters)->current_config.momc;
}
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->access_counters_supported);
@ -340,7 +341,7 @@ static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *confi
UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
}
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
@ -444,12 +445,12 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
return NV_OK;
fail:
uvm_gpu_deinit_access_counters(parent_gpu);
uvm_parent_gpu_deinit_access_counters(parent_gpu);
return status;
}
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
{
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
@ -475,7 +476,7 @@ void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
batch_context->phys.translations = NULL;
}
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
{
if (!parent_gpu->access_counters_supported)
return false;
@ -518,7 +519,7 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
// taken control of the notify buffer since the GPU was initialized. Then
// flush old notifications. This will update the cached_put pointer.
access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
access_counters->current_config.threshold = config->threshold;
@ -537,20 +538,20 @@ error:
// If ownership is yielded as part of reconfiguration, the access counters
// handling refcount may not be 0
static void access_counters_yield_ownership(uvm_gpu_t *gpu)
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
// Wait for any pending clear operation befor releasing ownership
status = uvm_tracker_wait(&access_counters->clear_tracker);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
&access_counters->rm_info));
UVM_ASSERT(status == NV_OK);
}
@ -579,14 +580,14 @@ static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConf
// Decrement the refcount of access counter enablement. If this is the last
// reference, disable the HW feature.
static void gpu_access_counters_disable(uvm_gpu_t *gpu)
static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
if (--gpu->parent->isr.access_counters.handling_ref_count == 0)
access_counters_yield_ownership(gpu);
if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
access_counters_yield_ownership(parent_gpu);
}
// Invoked during registration of the GPU in the VA space
@ -598,7 +599,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
if (uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = NV_ERR_INVALID_DEVICE;
}
else {
@ -616,7 +617,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
// modified to protect from concurrent enablement of access counters in
// another GPU
if (status == NV_OK)
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
}
// If this is the first reference taken on access counters, dropping the
@ -626,22 +627,24 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
return status;
}
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
uvm_va_space_t *va_space)
{
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(parent_gpu->access_counters_supported);
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
if (uvm_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id)) {
gpu_access_counters_disable(gpu);
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
parent_gpu->id)) {
parent_gpu_access_counters_disable(parent_gpu);
// If this is VA space reconfigured access counters, clear the
// ownership to allow for other processes to invoke the reconfiguration
if (gpu->parent->access_counter_buffer_info.reconfiguration_owner == va_space)
gpu->parent->access_counter_buffer_info.reconfiguration_owner = NULL;
if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
}
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
}
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
@ -660,15 +663,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
}
static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_flush_mode_t flush_mode)
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
uvm_gpu_buffer_flush_mode_t flush_mode)
{
NvU32 get;
NvU32 put;
uvm_spin_loop_t spin;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
// Read PUT pointer from the GPU if requested
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
@ -680,28 +684,28 @@ static void access_counter_buffer_flush_locked(uvm_gpu_t *gpu, uvm_gpu_buffer_fl
while (get != put) {
// Wait until valid bit is set
UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin);
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
gpu->parent->access_counter_buffer_hal->entry_clear_valid(gpu->parent, get);
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
++get;
if (get == access_counters->max_notifications)
get = 0;
}
write_get(gpu->parent, get);
write_get(parent_gpu, get);
}
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu)
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(parent_gpu->access_counters_supported);
// Disables access counter interrupts and notification servicing
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
if (gpu->parent->isr.access_counters.handling_ref_count > 0)
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
if (parent_gpu->isr.access_counters.handling_ref_count > 0)
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
}
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
@ -1027,7 +1031,7 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
if (!iter.migratable)
continue;
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, address, processor);
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
// If the page is throttling, ignore the access counter
// notification
@ -1212,7 +1216,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
service_context->num_retries = 0;
service_context->block_context->mm = mm;
uvm_va_block_context_init(service_context->block_context, mm);
if (uvm_va_block_is_hmm(va_block))
uvm_hmm_migrate_begin_wait(va_block);
@ -1221,7 +1226,8 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
&va_block_retry,
service_va_block_locked(processor,
va_block,
&va_block_retry,
@ -1506,8 +1512,6 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
service_context->num_retries = 0;
uvm_va_block_context_init(service_context->block_context, mm);
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
&va_block_retry,
service_va_block_locked(processor,
@ -1519,6 +1523,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_mask_t *accessed_pages,
const uvm_access_counter_buffer_entry_t *current_entry)
{
@ -1546,7 +1551,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
page_index = uvm_va_block_cpu_page_index(va_block, addr);
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);
// resident_id might be invalid or might already be the same as the GPU
// which received the notification if the memory was already migrated before
@ -1602,6 +1607,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
UVM_ASSERT(va_block);
UVM_ASSERT(index < batch_context->virt.num_notifications);
@ -1610,16 +1616,24 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
uvm_page_mask_zero(accessed_pages);
uvm_va_block_context_init(service_context->block_context, mm);
uvm_mutex_lock(&va_block->lock);
for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU64 address = current_entry->address.address;
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
else
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
expand_notification_block(gpu_va_space,
va_block,
batch_context->block_service_context.block_context,
accessed_pages,
current_entry);
}
else {
break;
}
}
*out_index = i;
@ -1698,6 +1712,9 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
// location is set
// If no pages were actually migrated, don't clear the access counters.
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
@ -1985,7 +2002,7 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
if (!gpu)
return NV_ERR_INVALID_DEVICE;
params->enabled = uvm_gpu_access_counters_required(gpu->parent);
params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);
uvm_gpu_release(gpu);
@ -2050,11 +2067,11 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
goto exit_isr_unlock;
}
if (!uvm_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->id)) {
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = gpu_access_counters_enable(gpu, &config);
if (status == NV_OK)
uvm_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->id);
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
else
goto exit_isr_unlock;
}
@ -2066,7 +2083,7 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
// enabled in at least gpu. This inconsistent state is not visible to other
// threads or VA spaces because of the ISR lock, and it is immediately
// rectified by retaking ownership.
access_counters_yield_ownership(gpu);
access_counters_yield_ownership(gpu->parent);
status = access_counters_take_ownership(gpu, &config);
// Retaking ownership failed, so RM owns the interrupt.
@ -2080,8 +2097,8 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
}
uvm_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->id);
gpu_access_counters_disable(gpu);
uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
parent_gpu_access_counters_disable(gpu->parent);
goto exit_isr_unlock;
}
@ -2167,42 +2184,42 @@ exit_release_gpu:
return status;
}
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore)
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
{
bool change_intr_state = false;
if (!gpu->parent->access_counters_supported)
if (!parent_gpu->access_counters_supported)
return;
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
if (do_ignore) {
if (gpu->parent->access_counter_buffer_info.notifications_ignored_count++ == 0)
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
change_intr_state = true;
}
else {
UVM_ASSERT(gpu->parent->access_counter_buffer_info.notifications_ignored_count >= 1);
if (--gpu->parent->access_counter_buffer_info.notifications_ignored_count == 0)
UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
change_intr_state = true;
}
if (change_intr_state) {
// We need to avoid an interrupt storm while ignoring notifications. We
// just disable the interrupt.
uvm_spin_lock_irqsave(&gpu->parent->isr.interrupts_lock);
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
if (do_ignore)
uvm_parent_gpu_access_counters_intr_disable(gpu->parent);
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
else
uvm_parent_gpu_access_counters_intr_enable(gpu->parent);
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
uvm_spin_unlock_irqrestore(&gpu->parent->isr.interrupts_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
if (!do_ignore)
access_counter_buffer_flush_locked(gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
}
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
}
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
@ -2216,7 +2233,7 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
return NV_ERR_INVALID_DEVICE;
if (gpu->parent->access_counters_supported)
uvm_gpu_access_counters_set_ignore(gpu, params->ignore);
uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
else
status = NV_ERR_NOT_SUPPORTED;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -27,13 +27,13 @@
#include "uvm_forward_decl.h"
#include "uvm_test_ioctl.h"
NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu);
void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
// Ignore or unignore access counters notifications. Ignoring means that the
// bottom half is a no-op which just leaves notifications in the HW buffer
@ -46,7 +46,7 @@ void uvm_gpu_access_counter_buffer_flush(uvm_gpu_t *gpu);
//
// When uningoring, the interrupt conditions will be re-evaluated to trigger
// processing of buffered notifications, if any exist.
void uvm_gpu_access_counters_set_ignore(uvm_gpu_t *gpu, bool do_ignore);
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
// Return whether the VA space has access counter migrations enabled. The
// caller must ensure that the VA space cannot go away.
@ -63,7 +63,7 @@ void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
// Check whether access counters should be enabled when the given GPU is
// registered on any VA space.
bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
// Functions used to enable/disable access counters on a GPU in the given VA
// space.
@ -72,12 +72,12 @@ bool uvm_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu);
// counters are currently enabled. The hardware notifications and interrupts on
// the GPU are enabled the first time any VA space invokes
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
// uvm_gpu_access_counters_disable
// uvm_parent_gpu_access_counters_disable().
//
// Locking: the VA space lock must not be held by the caller since these
// functions may take the access counters ISR lock.
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
struct file *filp);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -100,7 +100,7 @@ static unsigned schedule_replayable_faults_handler(uvm_parent_gpu_t *parent_gpu)
if (down_trylock(&parent_gpu->isr.replayable_faults.service_lock.sem) != 0)
return 0;
if (!uvm_gpu_replayable_faults_pending(parent_gpu)) {
if (!uvm_parent_gpu_replayable_faults_pending(parent_gpu)) {
up(&parent_gpu->isr.replayable_faults.service_lock.sem);
return 0;
}
@ -137,7 +137,7 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
// interrupts will be triggered by the gpu and faults may stay
// unserviced. Therefore, if there is a fault in the queue, we schedule
// a bottom half unconditionally.
if (!uvm_gpu_non_replayable_faults_pending(parent_gpu))
if (!uvm_parent_gpu_non_replayable_faults_pending(parent_gpu))
return 0;
nv_kref_get(&parent_gpu->gpu_kref);
@ -167,7 +167,7 @@ static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
return 0;
if (!uvm_gpu_access_counters_pending(parent_gpu)) {
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
up(&parent_gpu->isr.access_counters.service_lock.sem);
return 0;
}
@ -295,7 +295,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
uvm_va_block_context_t *block_context;
if (parent_gpu->replayable_faults_supported) {
status = uvm_gpu_fault_buffer_init(parent_gpu);
status = uvm_parent_gpu_fault_buffer_init(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU fault buffer: %s, GPU: %s\n",
nvstatusToString(status),
@ -361,7 +361,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
status = uvm_gpu_init_access_counters(parent_gpu);
status = uvm_parent_gpu_init_access_counters(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
nvstatusToString(status),
@ -423,7 +423,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
// bottom half never take the global lock, since we're holding it here.
//
// Note that it's safe to call nv_kthread_q_stop() even if
// nv_kthread_q_init() failed in uvm_gpu_init_isr().
// nv_kthread_q_init() failed in uvm_parent_gpu_init_isr().
nv_kthread_q_stop(&parent_gpu->isr.bottom_half_q);
nv_kthread_q_stop(&parent_gpu->isr.kill_channel_q);
}
@ -438,8 +438,8 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
// replayable_faults.disable_intr_ref_count since they must retain the
// GPU across uvm_parent_gpu_replayable_faults_isr_lock/
// uvm_parent_gpu_replayable_faults_isr_unlock. This means the
// uvm_gpu_replayable_faults_disable_intr above could only have raced
// with bottom halves.
// uvm_parent_gpu_replayable_faults_disable_intr above could only have
// raced with bottom halves.
//
// If we cleared replayable_faults.handling before the bottom half got
// to its uvm_parent_gpu_replayable_faults_isr_unlock, when it
@ -455,13 +455,13 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
uvm_parent_gpu_name(parent_gpu),
parent_gpu->isr.replayable_faults.disable_intr_ref_count);
uvm_gpu_fault_buffer_deinit(parent_gpu);
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
}
if (parent_gpu->access_counters_supported) {
// It is safe to deinitialize access counters even if they have not been
// successfully initialized.
uvm_gpu_deinit_access_counters(parent_gpu);
uvm_parent_gpu_deinit_access_counters(parent_gpu);
block_context =
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2023 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -116,8 +116,8 @@
// There is no error handling in this function. The caller is in charge of
// calling uvm_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
@ -145,7 +145,7 @@ NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *pare
return NV_OK;
}
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
@ -163,7 +163,7 @@ void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_
non_replayable_faults->fault_cache = NULL;
}
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvBool has_pending_faults;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017 NVIDIA Corporation
Copyright (c) 2017-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -26,12 +26,12 @@
#include <nvstatus.h>
#include "uvm_forward_decl.h"
bool uvm_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_service_non_replayable_fault_buffer(uvm_gpu_t *gpu);
NV_STATUS uvm_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_NON_REPLAYABLE_FAULTS_H__

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -44,6 +44,24 @@
// provides some background for understanding replayable faults, non-replayable
// faults, and how UVM services each fault type.
// The HW fault buffer flush mode instructs RM on how to flush the hardware
// replayable fault buffer; it is only used in Confidential Computing.
//
// Unless HW_FAULT_BUFFER_FLUSH_MODE_MOVE is functionally required (because UVM
// needs to inspect the faults currently present in the HW fault buffer) it is
// recommended to use HW_FAULT_BUFFER_FLUSH_MODE_DISCARD for performance
// reasons.
typedef enum
{
// Flush the HW fault buffer, discarding all the resulting faults. UVM never
// gets to see these faults.
HW_FAULT_BUFFER_FLUSH_MODE_DISCARD,
// Flush the HW fault buffer, and move all the resulting faults to the SW
// fault ("shadow") buffer.
HW_FAULT_BUFFER_FLUSH_MODE_MOVE,
} hw_fault_buffer_flush_mode_t;
#define UVM_PERF_REENABLE_PREFETCH_FAULTS_LAPSE_MSEC_DEFAULT 1000
// Lapse of time in milliseconds after which prefetch faults can be re-enabled.
@ -226,7 +244,7 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
batch_context->utlbs = NULL;
}
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
@ -253,7 +271,7 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
goto fail;
if (parent_gpu->non_replayable_faults_supported) {
status = uvm_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
status = uvm_parent_gpu_fault_buffer_init_non_replayable_faults(parent_gpu);
if (status != NV_OK)
goto fail;
}
@ -261,28 +279,28 @@ NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
return NV_OK;
fail:
uvm_gpu_fault_buffer_deinit(parent_gpu);
uvm_parent_gpu_fault_buffer_deinit(parent_gpu);
return status;
}
// Reinitialize state relevant to replayable fault handling after returning
// from a power management cycle.
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->replayable_faults_supported);
fault_buffer_reinit_replayable_faults(parent_gpu);
}
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
if (parent_gpu->non_replayable_faults_supported)
uvm_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(parent_gpu);
fault_buffer_deinit_replayable_faults(parent_gpu);
@ -297,7 +315,7 @@ void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
}
}
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
@ -533,25 +551,26 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
parent_gpu->fault_buffer_hal->write_get(parent_gpu, get);
}
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu)
// In Confidential Computing GSP-RM owns the HW replayable fault buffer.
// Flushing the fault buffer implies flushing both the HW buffer (using a RM
// API), and the SW buffer accessible by UVM ("shadow" buffer).
//
// The HW buffer needs to be flushed first. This is because, once that flush
// completes, any faults that were present in the HW buffer have been moved to
// the shadow buffer, or have been discarded by RM.
static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_fault_buffer_flush_mode_t flush_mode)
{
NV_STATUS status = NV_OK;
NV_STATUS status;
NvBool is_flush_mode_move;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
UVM_ASSERT((flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE) || (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_DISCARD));
// When Confidential Computing is enabled, GSP-RM owns the HW replayable
// fault buffer. Flushing the fault buffer implies flushing both the HW
// buffer (using a RM API), and the SW buffer accessible by UVM ("shadow"
// buffer).
//
// The HW buffer needs to be flushed first. This is because, once that
// flush completes, any faults that were present in the HW buffer when
// fault_buffer_flush_locked is called, are now either flushed from the HW
// buffer, or are present in the shadow buffer and are about to be discarded
// too.
if (!g_uvm_global.conf_computing_enabled)
return NV_OK;
// Flush the HW replayable buffer owned by GSP-RM.
status = nvUvmInterfaceFlushReplayableFaultBuffer(parent_gpu->rm_device);
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
UVM_ASSERT(status == NV_OK);
@ -595,10 +614,9 @@ static NV_STATUS fault_buffer_flush_locked(uvm_gpu_t *gpu,
// Read PUT pointer from the GPU if requested
if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT || flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT) {
status = hw_fault_buffer_flush_locked(parent_gpu);
status = hw_fault_buffer_flush_locked(parent_gpu, HW_FAULT_BUFFER_FLUSH_MODE_DISCARD);
if (status != NV_OK)
return status;
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
}
@ -1435,7 +1453,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
uvm_fault_access_type_to_prot(service_access_type)))
continue;
thrashing_hint = uvm_perf_thrashing_get_hint(va_block, current_entry->fault_address, gpu->id);
thrashing_hint = uvm_perf_thrashing_get_hint(va_block,
block_context->block_context,
current_entry->fault_address,
gpu->id);
if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
// Throttling is implemented by sleeping in the fault handler on
// the CPU and by continuing to process faults on other pages on
@ -1981,7 +2002,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_servic
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
// GSP to copy all available faults from the HW buffer into the shadow
// buffer.
status = hw_fault_buffer_flush_locked(gpu->parent);
status = hw_fault_buffer_flush_locked(gpu->parent, HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
if (status != NV_OK)
goto done;
@ -2738,14 +2759,14 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu, uvm_fau
(uvm_enable_builtin_tests &&
parent_gpu->rm_info.isSimulated &&
batch_context->num_invalid_prefetch_faults > 5))) {
uvm_gpu_disable_prefetch_faults(parent_gpu);
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
}
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
// Reenable prefetch faults after some time
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
uvm_gpu_enable_prefetch_faults(parent_gpu);
uvm_parent_gpu_enable_prefetch_faults(parent_gpu);
}
}
@ -2872,7 +2893,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
UVM_DBG_PRINT("Error servicing replayable faults on GPU: %s\n", uvm_gpu_name(gpu));
}
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@ -2883,7 +2904,7 @@ void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
}
}
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
@ -2940,7 +2961,7 @@ NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARA
do {
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
pending = uvm_gpu_replayable_faults_pending(gpu->parent);
pending = uvm_parent_gpu_replayable_faults_pending(gpu->parent);
uvm_parent_gpu_replayable_faults_isr_unlock(gpu->parent);
if (!pending)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -52,12 +52,12 @@ typedef enum
const char *uvm_perf_fault_replay_policy_string(uvm_perf_fault_replay_policy_t fault_replay);
NV_STATUS uvm_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_fault_buffer_resume(uvm_parent_gpu_t *parent_gpu);
bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
// Clear valid bit for all remaining unserviced faults in the buffer, set GET to
// PUT, and push a fault replay of type UVM_FAULT_REPLAY_TYPE_START. It does not
@ -68,8 +68,8 @@ bool uvm_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_gpu_fault_buffer_flush(uvm_gpu_t *gpu);
// Enable/disable HW support for prefetch-initiated faults
void uvm_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
// Service pending replayable faults on the given GPU. This function must be
// only called from the ISR bottom half

View File

@ -1306,7 +1306,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
uvm_va_policy_node_t *node;
uvm_va_block_region_t region;
uvm_processor_mask_t map_processors;
uvm_processor_mask_t *map_processors = &block_context->hmm.map_processors_eviction;
uvm_processor_id_t id;
NV_STATUS tracker_status;
NV_STATUS status = NV_OK;
@ -1333,9 +1333,9 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
// Exclude the processors that have been already mapped due to
// AccessedBy.
uvm_processor_mask_andnot(&map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
for_each_gpu_id_in_mask(id, &map_processors) {
for_each_gpu_id_in_mask(id, map_processors) {
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
uvm_va_block_gpu_state_t *gpu_state;
@ -1866,7 +1866,7 @@ static void lock_block_cpu_page(uvm_va_block_t *va_block,
unsigned long *dst_pfns,
uvm_page_mask_t *same_devmem_page_mask)
{
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(va_block, page_to_nid(src_page), page_index);
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_any_chunk_for_page(va_block, page_index);
uvm_va_block_region_t chunk_region;
struct page *dst_page;
@ -2708,7 +2708,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
// Since there is a CPU resident page, there shouldn't be one
// anywhere else. TODO: Bug 3660922: Need to handle read
// duplication at some point.
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
service_context->block_context,
page_index));
// migrate_vma_setup() was able to isolate and lock the page;
// therefore, it is CPU resident and not mapped.
@ -2725,8 +2727,9 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
// used for GPU to GPU copies. It can't be an evicted page because
// migrate_vma_setup() would have found a source page.
if (uvm_page_mask_test(&va_block->cpu.allocated, page_index)) {
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block, page_index));
UVM_ASSERT(!uvm_va_block_page_resident_processors_count(va_block,
service_context->block_context,
page_index));
hmm_va_block_cpu_page_unpopulate(va_block, page_index, NULL);
}
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2013-2019 NVidia Corporation
Copyright (c) 2013-2023 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -320,7 +320,7 @@ typedef struct
typedef struct
{
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // IN
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN
NvU32 numGpus; // IN
NvU64 serverId NV_ALIGN_BYTES(8); // OUT
NV_STATUS rmStatus; // OUT
@ -344,9 +344,9 @@ typedef struct
typedef struct
{
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS]; // OUT
NvU32 validCount; // OUT
NV_STATUS rmStatus; // OUT
NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT
NvU32 validCount; // OUT
NV_STATUS rmStatus; // OUT
} UVM_GET_GPU_UUID_TABLE_PARAMS;
#if defined(WIN32) || defined(WIN64)
@ -494,7 +494,7 @@ typedef struct
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
NvU64 offset NV_ALIGN_BYTES(8); // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
NvS32 rmCtrlFd; // IN
NvU32 hClient; // IN
@ -552,7 +552,7 @@ typedef struct
typedef struct
{
NvProcessorUuid gpu_uuid; // IN
NvProcessorUuid gpu_uuid; // IN/OUT
NvBool numaEnabled; // OUT
NvS32 numaNodeId; // OUT
NvS32 rmCtrlFd; // IN
@ -835,7 +835,14 @@ typedef struct
//
// Initialize any tracker object such as a queue or counter
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, UvmToolsCreateProcessorCounters
// UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters,
// UvmToolsCreateProcessorCounters.
// Note that the order of structure elements has the version as the last field.
// This is used to tell whether the kernel supports V2 events or not because
// the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that
// field but V2 will. This is needed because it is possible to create an event
// queue before CUDA is initialized which means UvmSetDriverVersion() hasn't
// been called yet and the kernel version is unknown.
//
#define UVM_TOOLS_INIT_EVENT_TRACKER UVM_IOCTL_BASE(56)
typedef struct
@ -847,6 +854,8 @@ typedef struct
NvU32 allProcessors; // IN
NvU32 uvmFd; // IN
NV_STATUS rmStatus; // OUT
NvU32 requestedVersion; // IN
NvU32 grantedVersion; // OUT
} UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;
//
@ -927,6 +936,12 @@ typedef struct
//
// UvmToolsGetProcessorUuidTable
// Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be
// an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1
// processor IDs (physical GPU UUIDs) will be reported.
// tablePtr == 0 and count == 0 can be used to query how many processors are
// present in order to dynamically allocate the correct size array since the
// total number of processors is returned in 'count'.
//
#define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE UVM_IOCTL_BASE(64)
typedef struct
@ -934,6 +949,7 @@ typedef struct
NvU64 tablePtr NV_ALIGN_BYTES(8); // IN
NvU32 count; // IN/OUT
NV_STATUS rmStatus; // OUT
NvU32 version; // OUT
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
@ -979,7 +995,7 @@ typedef struct
{
NvU64 base NV_ALIGN_BYTES(8); // IN
NvU64 length NV_ALIGN_BYTES(8); // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS]; // IN
UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN
NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN
NV_STATUS rmStatus; // OUT
} UVM_ALLOC_SEMAPHORE_POOL_PARAMS;

View File

@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define UVM_IS_CONFIG_HMM() 0
#endif
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
#else
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
#endif
// Various issues prevent us from using mmu_notifiers in older kernels. These
// include:
// - ->release being called under RCU instead of SRCU: fixed by commit

View File

@ -633,8 +633,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
uvm_gpu_t *mapping_gpu,
const UvmGpuMemoryInfo *mem_info)
{
uvm_gpu_t *owning_gpu = NULL;
uvm_gpu_t *gpu;
uvm_gpu_t *owning_gpu;
if (mem_info->egm)
UVM_ASSERT(mem_info->sysmem);
@ -653,16 +652,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
// registered.
// This also checks for if EGM owning GPU is registered.
// TODO: Bug 4351121: RM will return the GI UUID, but
// uvm_va_space_get_gpu_by_uuid() currently matches on physical GPU UUIDs.
// Match on GI UUID until the UVM user level API has been updated to use
// the GI UUID.
for_each_va_space_gpu(gpu, va_space) {
if (uvm_uuid_eq(&gpu->uuid, &mem_info->uuid)) {
owning_gpu = gpu;
break;
}
}
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
if (!owning_gpu)
return NV_ERR_INVALID_DEVICE;
@ -954,6 +944,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
goto error;
}
// Check for the maximum page size for the mapping of vidmem allocations,
// the vMMU segment size may limit the range of page sizes.
if (!ext_gpu_map->is_sysmem && (ext_gpu_map->gpu == ext_gpu_map->owning_gpu) &&
(mapping_page_size > mapping_gpu->mem_info.max_vidmem_page_size))
mapping_page_size = mapping_gpu->mem_info.max_vidmem_page_size;
mem_info.pageSize = mapping_page_size;
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
@ -989,7 +985,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
if (uvm_api_range_invalid_4k(params->base, params->length))
return NV_ERR_INVALID_ADDRESS;
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS)
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
return NV_ERR_INVALID_ARGUMENT;
uvm_va_space_down_read_rm(va_space);

View File

@ -86,7 +86,7 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
// Only map those pages that are not already mapped on destination
for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
prot = uvm_va_block_page_compute_highest_permission(va_block, va_block_context, dest_id, page_index);
if (prot == UVM_PROT_NONE)
continue;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -149,6 +149,26 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
return NV_OK;
}
// The aperture may filter the biggest page size:
// - UVM_APERTURE_VID biggest page size on vidmem mappings
// - UVM_APERTURE_SYS biggest page size on sysmem mappings
// - UVM_APERTURE_PEER_0-7 biggest page size on peer mappings
static NvU32 mmu_biggest_page_size(uvm_page_tree_t *tree, uvm_aperture_t aperture)
{
UVM_ASSERT(aperture < UVM_APERTURE_DEFAULT);
// There may be scenarios where the GMMU must use a subset of the supported
// page sizes, e.g., to comply with the vMMU supported page sizes due to
// segmentation sizes.
if (aperture == UVM_APERTURE_VID) {
UVM_ASSERT(tree->gpu->mem_info.max_vidmem_page_size <= NV_U32_MAX);
return (NvU32) tree->gpu->mem_info.max_vidmem_page_size;
}
else {
return 1 << __fls(tree->hal->page_sizes());
}
}
static NV_STATUS phys_mem_allocate_vidmem(uvm_page_tree_t *tree,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
@ -856,7 +876,7 @@ static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
if (!page_tree_ats_init_required(tree))
return NV_OK;
page_size = uvm_mmu_biggest_page_size(tree);
page_size = mmu_biggest_page_size(tree, UVM_APERTURE_VID);
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
@ -1090,6 +1110,8 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
tree->gpu_va_space = gpu_va_space;
tree->big_page_size = big_page_size;
UVM_ASSERT(gpu->mem_info.max_vidmem_page_size & tree->hal->page_sizes());
page_tree_set_location(tree, location);
uvm_tracker_init(&tree->tracker);
@ -2301,7 +2323,7 @@ NV_STATUS create_static_vidmem_mapping(uvm_gpu_t *gpu)
UVM_ASSERT(!uvm_mmu_parent_gpu_needs_dynamic_vidmem_mapping(gpu->parent));
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_VID);
size = UVM_ALIGN_UP(gpu->mem_info.max_allocatable_address + 1, page_size);
UVM_ASSERT(page_size);
@ -2338,9 +2360,9 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
return NV_OK;
page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
aperture = uvm_gpu_peer_aperture(gpu, peer);
page_size = mmu_biggest_page_size(&gpu->address_space_tree, aperture);
size = UVM_ALIGN_UP(peer->mem_info.max_allocatable_address + 1, page_size);
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
phys_offset = 0ULL;
@ -2783,7 +2805,7 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
// sysmem mappings with 128K entries.
UVM_ASSERT(is_power_of_2(mapping_size));
UVM_ASSERT(mapping_size >= UVM_SIZE_1GB);
UVM_ASSERT(mapping_size >= uvm_mmu_biggest_page_size(&gpu->address_space_tree));
UVM_ASSERT(mapping_size >= mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS));
UVM_ASSERT(mapping_size <= flat_sysmem_va_size);
flat_sysmem_va_size = UVM_ALIGN_UP(flat_sysmem_va_size, mapping_size);
@ -2828,7 +2850,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
if (sysmem_mapping->range_vec == NULL) {
uvm_gpu_address_t virtual_address = uvm_parent_gpu_address_virtual_from_sysmem_phys(gpu->parent, curr_pa);
NvU64 phys_offset = curr_pa;
NvU32 page_size = uvm_mmu_biggest_page_size(&gpu->address_space_tree);
NvU32 page_size = mmu_biggest_page_size(&gpu->address_space_tree, UVM_APERTURE_SYS);
uvm_pmm_alloc_flags_t pmm_flags;
// No eviction is requested when allocating the page tree storage,

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -612,6 +612,9 @@ static NvU64 uvm_mmu_pde_coverage(uvm_page_tree_t *tree, NvU32 page_size)
return uvm_mmu_page_tree_entries(tree, depth, page_size) * page_size;
}
// Page sizes supported by the GPU. Use uvm_mmu_biggest_page_size() to retrieve
// the largest page size supported in a given system, which considers the GMMU
// and vMMU page sizes and segment sizes.
static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
{
UVM_ASSERT_MSG(is_power_of_2(page_size), "0x%x\n", page_size);
@ -642,11 +645,6 @@ static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_pa
return page_size;
}
static NvU32 uvm_mmu_biggest_page_size(uvm_page_tree_t *tree)
{
return 1 << __fls(tree->hal->page_sizes());
}
static NvU32 uvm_mmu_pte_size(uvm_page_tree_t *tree, NvU32 page_size)
{
return tree->hal->entry_size(tree->hal->page_table_depth(page_size));

View File

@ -1442,6 +1442,7 @@ static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_locatio
static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
page_thrashing_info_t *page_thrashing,
uvm_processor_id_t requester)
@ -1460,7 +1461,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, requester);
closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
if (uvm_va_block_is_hmm(va_block)) {
// HMM pages always start out resident on the CPU but may not be
// recorded in the va_block state because hmm_range_fault() or
@ -1601,6 +1602,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
// that case we keep the page pinned while applying the same algorithm as in
// Phase1.
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 address,
uvm_processor_id_t requester)
{
@ -1713,6 +1715,7 @@ uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
else {
hint = get_hint_for_migration_thrashing(va_space_thrashing,
va_block,
va_block_context,
page_index,
page_thrashing,
requester);

View File

@ -74,7 +74,9 @@ typedef struct
} uvm_perf_thrashing_hint_t;
// Obtain a hint to prevent thrashing on the page with given address
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block, NvU64 address,
uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
NvU64 address,
uvm_processor_id_t requester);
// Obtain a pointer to a mask with the processors that are thrashing on the

View File

@ -1408,8 +1408,6 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(accessing_gpu, gpu);
uvm_gpu_identity_mapping_t *gpu_peer_mapping;
UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_INVALID);
if (peer_caps->is_indirect_peer ||
(accessing_gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_PHYSICAL)) {
// Indirect peers are accessed as sysmem addresses, so they don't need

View File

@ -1082,6 +1082,7 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
{
uvm_va_range_t *va_range;
uvm_va_block_t *va_block = NULL;
uvm_va_block_context_t *va_block_context = NULL;
NvU32 num_blocks;
NvU32 index = 0;
uvm_gpu_phys_address_t phys_addr = {0};
@ -1099,9 +1100,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
}
TEST_CHECK_RET(va_block);
va_block_context = uvm_va_block_context_alloc(NULL);
TEST_CHECK_RET(va_block_context);
uvm_mutex_lock(&va_block->lock);
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, 0, gpu->id), gpu->id);
is_resident = uvm_id_equal(uvm_va_block_page_get_closest_resident(va_block, va_block_context, 0, gpu->id), gpu->id);
if (is_resident) {
phys_addr = uvm_va_block_gpu_phys_page_address(va_block, 0, gpu);
phys_addr.address = UVM_ALIGN_DOWN(phys_addr.address, UVM_VA_BLOCK_SIZE);
@ -1109,6 +1113,8 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
uvm_mutex_unlock(&va_block->lock);
uvm_va_block_context_free(va_block_context);
TEST_CHECK_RET(is_resident);
// Perform the lookup for the whole root chunk

View File

@ -25,6 +25,8 @@
#include "uvm_processors.h"
static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
const uvm_processor_mask_t g_uvm_processor_mask_empty = { };
NV_STATUS uvm_processor_mask_cache_init(void)
{

View File

@ -522,6 +522,9 @@ UVM_PROCESSOR_MASK(uvm_processor_mask_t, \
uvm_processor_id_t, \
uvm_id_from_value)
extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
// Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
// Returns whether the GPUs in subset are a subset of the GPUs in mask.
bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
@ -567,6 +570,10 @@ void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mas
(uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
i = uvm_gpu_id_next(i))
// Helper to iterate over all sub processor indexes.
#define for_each_sub_processor_index(i) \
for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
// Helper to iterate over all valid processor ids.
#define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))

View File

@ -41,15 +41,11 @@
static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
{
NvU64 retained_count = 0;
uvm_parent_gpu_t *parent_gpu;
uvm_gpu_t *gpu = NULL;
uvm_mutex_lock(&g_uvm_global.global_lock);
parent_gpu = uvm_parent_gpu_get_by_uuid(&params->gpu_uuid);
if (parent_gpu)
gpu = uvm_gpu_get_by_parent_and_swizz_id(parent_gpu, params->swizz_id);
gpu = uvm_gpu_get_by_uuid(&params->gpu_uuid);
if (gpu != NULL)
retained_count = uvm_gpu_retained_count(gpu);

View File

@ -40,7 +40,6 @@ typedef struct
{
// In params
NvProcessorUuid gpu_uuid;
NvU32 swizz_id;
// Out params
NvU64 ref_count NV_ALIGN_BYTES(8);
NV_STATUS rmStatus;
@ -192,7 +191,7 @@ typedef struct
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
NvProcessorUuid preferred_location; // Out
NvS32 preferred_cpu_nid; // Out
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS]; // Out
NvProcessorUuid accessed_by[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 accessed_by_count; // Out
NvU32 type; // Out (UVM_TEST_VA_RANGE_TYPE)
union
@ -505,7 +504,12 @@ typedef struct
typedef struct
{
// In params
UvmEventEntry entry; // contains only NvUxx types
union
{
UvmEventEntry_V1 entry_v1; // contains only NvUxx types
UvmEventEntry_V2 entry_v2; // contains only NvUxx types
};
NvU32 version;
NvU32 count;
// Out param
@ -620,7 +624,7 @@ typedef struct
// Array of processors which have a resident copy of the page containing
// lookup_address.
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS]; // Out
NvProcessorUuid resident_on[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 resident_on_count; // Out
// If the memory is resident on the CPU, the NUMA node on which the page
@ -631,24 +635,24 @@ typedef struct
// system-page-sized portion of this allocation which contains
// lookup_address is guaranteed to be resident on the corresponding
// processor.
NvU32 resident_physical_size[UVM_MAX_PROCESSORS]; // Out
NvU32 resident_physical_size[UVM_MAX_PROCESSORS_V2]; // Out
// The physical address of the physical allocation backing lookup_address.
NvU64 resident_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
NvU64 resident_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
// Array of processors which have a virtual mapping covering lookup_address.
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 mapping_type[UVM_MAX_PROCESSORS_V2]; // Out
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS_V2] NV_ALIGN_BYTES(8); // Out
NvU32 mapped_on_count; // Out
// The size of the virtual mapping covering lookup_address on each
// mapped_on processor.
NvU32 page_size[UVM_MAX_PROCESSORS]; // Out
NvU32 page_size[UVM_MAX_PROCESSORS_V2]; // Out
// Array of processors which have physical memory populated that would back
// lookup_address if it was resident.
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS]; // Out
NvProcessorUuid populated_on[UVM_MAX_PROCESSORS_V2]; // Out
NvU32 populated_on_count; // Out
NV_STATUS rmStatus; // Out

File diff suppressed because it is too large Load Diff

View File

@ -52,8 +52,19 @@ typedef enum
typedef unsigned long long UvmStream;
#define UVM_MAX_GPUS NV_MAX_DEVICES
#define UVM_MAX_PROCESSORS (UVM_MAX_GPUS + 1)
// The maximum number of GPUs changed when multiple MIG instances per
// uvm_parent_gpu_t were added. See UvmEventQueueCreate().
#define UVM_MAX_GPUS_V1 NV_MAX_DEVICES
#define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1)
#define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES)
#define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1)
// For backward compatibility:
// TODO: Bug 4465348: remove these after replacing old references.
#define UVM_MAX_GPUS UVM_MAX_GPUS_V1
#define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1
#define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8))
#define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1)
#define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2)
@ -152,6 +163,8 @@ typedef enum {
typedef struct
{
// UUID of the physical GPU if the GPU is not SMC capable or SMC enabled,
// or the GPU instance UUID of the partition.
NvProcessorUuid gpuUuid;
NvU32 gpuMappingType; // UvmGpuMappingType
NvU32 gpuCachingType; // UvmGpuCachingType
@ -410,7 +423,29 @@ typedef struct
NvU32 pid; // process id causing the fault
NvU32 threadId; // thread id causing the fault
NvU64 pc; // address of the instruction causing the fault
} UvmEventCpuFaultInfo;
} UvmEventCpuFaultInfo_V1;
typedef struct
{
//
// eventType has to be 1st argument of this structure. Setting eventType to
// UvmEventTypeMemoryViolation helps to identify event data in a queue.
//
NvU8 eventType;
NvU8 accessType; // read/write violation (UvmEventMemoryAccessType)
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets.
//
NvU16 padding16Bits;
NvS32 nid; // NUMA node ID of faulting CPU
NvU64 address; // faulting address
NvU64 timeStamp; // cpu time when the fault occurred
NvU32 pid; // process id causing the fault
NvU32 threadId; // thread id causing the fault
NvU64 pc; // address of the instruction causing the fault
} UvmEventCpuFaultInfo_V2;
typedef enum
{
@ -567,7 +602,49 @@ typedef struct
// on the gpu
NvU64 endTimeStampGpu; // time stamp when the migration finished
// on the gpu
} UvmEventMigrationInfo;
} UvmEventMigrationInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure. Setting eventType
// to UvmEventTypeMigration helps to identify event data in a queue.
//
NvU8 eventType;
//
// Cause that triggered the migration
//
NvU8 migrationCause;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU16 padding16Bits;
//
// Indices are used for the source and destination of migration instead of
// using gpu uuid/cpu id. This reduces the size of each event. The index to
// gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable.
// Currently we do not distinguish between CPUs so they all use index 0.
//
NvU16 srcIndex; // source CPU/GPU index
NvU16 dstIndex; // destination CPU/GPU index
NvS32 srcNid; // source CPU NUMA node ID
NvS32 dstNid; // destination CPU NUMA node ID
NvU64 address; // base virtual addr used for migration
NvU64 migratedBytes; // number of bytes migrated
NvU64 beginTimeStamp; // cpu time stamp when the memory transfer
// was queued on the gpu
NvU64 endTimeStamp; // cpu time stamp when the memory transfer
// finalization was communicated to the cpu
// For asynchronous operations this field
// will be zero
NvU64 rangeGroupId; // range group tied with this migration
NvU64 beginTimeStampGpu; // time stamp when the migration started
// on the gpu
NvU64 endTimeStampGpu; // time stamp when the migration finished
// on the gpu
} UvmEventMigrationInfo_V2;
typedef enum
{
@ -633,7 +710,64 @@ typedef struct
//
NvU8 padding8Bits;
NvU16 padding16Bits;
} UvmEventGpuFaultInfo;
} UvmEventGpuFaultInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeGpuFault helps to identify event data in
// a queue.
//
NvU8 eventType;
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8Bits_1;
union
{
NvU16 gpcId; // If this is a replayable fault, this field contains
// the physical GPC index where the fault was
// triggered
NvU16 channelId; // If this is a non-replayable fault, this field
// contains the id of the channel that launched the
// operation that caused the fault.
//
// TODO: Bug 3283289: this field is ambiguous for
// Ampere+ GPUs, but it is never consumed by clients.
};
NvU16 clientId; // Id of the MMU client that triggered the fault. This
// is the value provided by HW and is architecture-
// specific. There are separate client ids for
// different client types (See dev_fault.h).
NvU64 address; // virtual address at which gpu faulted
NvU64 timeStamp; // time stamp when the cpu started processing the
// fault
NvU64 timeStampGpu; // gpu time stamp when the fault entry was written
// in the fault buffer
NvU32 batchId; // Per-GPU unique id to identify the faults serviced
// in batch before:
// - Issuing a replay for replayable faults
// - Re-scheduling the channel for non-replayable
// faults.
NvU8 clientType; // Volta+ GPUs can fault on clients other than GR.
// UvmEventFaultClientTypeGpc indicates replayable
// fault, while UvmEventFaultClientTypeHub indicates
// non-replayable fault.
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8Bits_2;
NvU16 gpuIndex; // GPU that experienced the fault
} UvmEventGpuFaultInfo_V2;
//------------------------------------------------------------------------------
// This info is provided when a gpu fault is replayed (for replayable faults)
@ -666,7 +800,25 @@ typedef struct
// accesses is queued on the gpu
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
// executing on the gpu
} UvmEventGpuFaultReplayInfo;
} UvmEventGpuFaultReplayInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event
// data in a queue.
//
NvU8 eventType;
NvU8 clientType; // See clientType in UvmEventGpuFaultInfo
NvU16 gpuIndex; // GPU that experienced the fault
NvU32 batchId; // Per-GPU unique id to identify the faults that
// have been serviced in batch
NvU64 timeStamp; // cpu time when the replay of the faulting memory
// accesses is queued on the gpu
NvU64 timeStampGpu; // gpu time stamp when the replay operation finished
// executing on the gpu
} UvmEventGpuFaultReplayInfo_V2;
//------------------------------------------------------------------------------
// This info is provided per fatal fault
@ -689,7 +841,26 @@ typedef struct
NvU16 padding16bits;
NvU64 address; // virtual address at which the processor faulted
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
} UvmEventFatalFaultInfo;
} UvmEventFatalFaultInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeFatalFault helps to identify event data
// in a queue.
//
NvU8 eventType;
NvU8 faultType; // type of gpu fault, refer UvmEventFaultType. Only
// valid if processorIndex is a GPU
NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType
NvU8 reason; // reason why the fault is fatal, refer
// UvmEventFatalReason
NvU16 processorIndex; // processor that experienced the fault
NvU16 padding16bits;
NvU64 address; // virtual address at which the processor faulted
NvU64 timeStamp; // CPU time when the fault is detected to be fatal
} UvmEventFatalFaultInfo_V2;
typedef struct
{
@ -718,7 +889,38 @@ typedef struct
// participate in read-duplicate this is time stamp
// when all the operations have been pushed to all
// the processors.
} UvmEventReadDuplicateInfo;
} UvmEventReadDuplicateInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeReadDuplicate helps to identify event
// data in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits;
NvU32 padding32bits;
NvU64 address; // virtual address of the memory region that is
// read-duplicated
NvU64 size; // size in bytes of the memory region that is
// read-duplicated
NvU64 timeStamp; // cpu time stamp when the memory region becomes
// read-duplicate. Since many processors can
// participate in read-duplicate this is time stamp
// when all the operations have been pushed to all
// the processors.
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
// mask that specifies in which processors this
// memory region is read-duplicated. This is last
// so UVM_PROCESSOR_MASK_SIZE can grow.
} UvmEventReadDuplicateInfo_V2;
typedef struct
{
@ -728,13 +930,13 @@ typedef struct
// identify event data in a queue.
//
NvU8 eventType;
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
// valid copy of the memory region
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 residentIndex; // index of the cpu/gpu that now contains the only
// valid copy of the memory region
NvU16 padding16bits;
NvU32 padding32bits;
NvU64 address; // virtual address of the memory region that is
@ -746,8 +948,34 @@ typedef struct
// participate in read-duplicate this is time stamp
// when all the operations have been pushed to all
// the processors.
} UvmEventReadDuplicateInvalidateInfo;
} UvmEventReadDuplicateInvalidateInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to
// identify event data in a queue.
//
NvU8 eventType;
NvU8 padding8bits;
NvU16 residentIndex;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU32 padding32bits;
NvU64 address; // virtual address of the memory region that is
// read-duplicated
NvU64 size; // size of the memory region that is
// read-duplicated
NvU64 timeStamp; // cpu time stamp when the memory region is no
// longer read-duplicate. Since many processors can
// participate in read-duplicate this is time stamp
// when all the operations have been pushed to all
// the processors.
} UvmEventReadDuplicateInvalidateInfo_V2;
typedef struct
{
@ -770,7 +998,30 @@ typedef struct
// changed
NvU64 timeStamp; // cpu time stamp when the new page size is
// queued on the gpu
} UvmEventPageSizeChangeInfo;
} UvmEventPageSizeChangeInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypePageSizeChange helps to identify event
// data in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 processorIndex; // cpu/gpu processor index for which the page size
// changed
NvU32 size; // new page size
NvU64 address; // virtual address of the page whose size has
// changed
NvU64 timeStamp; // cpu time stamp when the new page size is
// queued on the gpu
} UvmEventPageSizeChangeInfo_V2;
typedef struct
{
@ -794,7 +1045,33 @@ typedef struct
// thrashing
NvU64 size; // size of the memory region that is thrashing
NvU64 timeStamp; // cpu time stamp when thrashing is detected
} UvmEventThrashingDetectedInfo;
} UvmEventThrashingDetectedInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeThrashingDetected helps to identify event
// data in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits;
NvU32 padding32bits;
NvU64 address; // virtual address of the memory region that is
// thrashing
NvU64 size; // size of the memory region that is thrashing
NvU64 timeStamp; // cpu time stamp when thrashing is detected
NvU64 processors[UVM_PROCESSOR_MASK_SIZE];
// mask that specifies which processors are
// fighting for this memory region. This is last
// so UVM_PROCESSOR_MASK_SIZE can grow.
} UvmEventThrashingDetectedInfo_V2;
typedef struct
{
@ -815,7 +1092,28 @@ typedef struct
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu start time stamp for the throttling operation
} UvmEventThrottlingStartInfo;
} UvmEventThrottlingStartInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeThrottlingStart helps to identify event
// data in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits[2];
NvU16 processorIndex; // index of the cpu/gpu that was throttled
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu start time stamp for the throttling operation
} UvmEventThrottlingStartInfo_V2;
typedef struct
{
@ -836,7 +1134,28 @@ typedef struct
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu end time stamp for the throttling operation
} UvmEventThrottlingEndInfo;
} UvmEventThrottlingEndInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeThrottlingEnd helps to identify event
// data in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits[2];
NvU16 processorIndex; // index of the cpu/gpu that was throttled
NvU64 address; // address of the page whose servicing is being
// throttled
NvU64 timeStamp; // cpu end time stamp for the throttling operation
} UvmEventThrottlingEndInfo_V2;
typedef enum
{
@ -892,7 +1211,36 @@ typedef struct
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
// the processor specified by srcIndex. If srcIndex
// is a cpu, this field will be zero.
} UvmEventMapRemoteInfo;
} UvmEventMapRemoteInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeMapRemote helps to identify event data
// in a queue.
//
NvU8 eventType;
NvU8 mapRemoteCause; // field to type UvmEventMapRemoteCause that tells
// the cause for the page to be mapped remotely
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU16 padding16bits;
NvU16 srcIndex; // index of the cpu/gpu being remapped
NvU16 dstIndex; // index of the cpu/gpu memory that contains the
// memory region data
NvU64 address; // virtual address of the memory region that is
// thrashing
NvU64 size; // size of the memory region that is thrashing
NvU64 timeStamp; // cpu time stamp when all the required operations
// have been pushed to the processor
NvU64 timeStampGpu; // time stamp when the new mapping is effective in
// the processor specified by srcIndex. If srcIndex
// is a cpu, this field will be zero.
} UvmEventMapRemoteInfo_V2;
typedef struct
{
@ -918,7 +1266,33 @@ typedef struct
NvU64 addressIn; // virtual address that caused the eviction
NvU64 size; // size of the memory region that being evicted
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
} UvmEventEvictionInfo;
} UvmEventEvictionInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeEviction helps to identify event data
// in a queue.
//
NvU8 eventType;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits;
NvU16 srcIndex; // index of the cpu/gpu from which data is being
// evicted
NvU16 dstIndex; // index of the cpu/gpu memory to which data is
// going to be stored
NvU64 addressOut; // virtual address of the memory region that is
// being evicted
NvU64 addressIn; // virtual address that caused the eviction
NvU64 size; // size of the memory region that being evicted
NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu
} UvmEventEvictionInfo_V2;
// TODO: Bug 1870362: [uvm] Provide virtual address and processor index in
// AccessCounter events
@ -978,7 +1352,44 @@ typedef struct
NvU32 bank;
NvU64 address;
NvU64 instancePtr;
} UvmEventTestAccessCounterInfo;
} UvmEventTestAccessCounterInfo_V1;
typedef struct
{
//
// eventType has to be the 1st argument of this structure.
// Setting eventType = UvmEventTypeAccessCounter helps to identify event
// data in a queue.
//
NvU8 eventType;
// See uvm_access_counter_buffer_entry_t for details
NvU8 aperture;
NvU8 instancePtrAperture;
NvU8 isVirtual;
NvU8 isFromCpu;
NvU8 veId;
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 srcIndex; // index of the gpu that received the access counter
// notification
NvU16 padding16bits;
NvU32 value;
NvU32 subGranularity;
NvU32 tag;
NvU32 bank;
NvU32 padding32bits;
NvU64 address;
NvU64 instancePtr;
} UvmEventTestAccessCounterInfo_V2;
typedef struct
{
@ -998,30 +1409,64 @@ typedef struct
NvU8 eventType;
UvmEventMigrationInfo_Lite migration_Lite;
UvmEventCpuFaultInfo cpuFault;
UvmEventMigrationInfo migration;
UvmEventGpuFaultInfo gpuFault;
UvmEventGpuFaultReplayInfo gpuFaultReplay;
UvmEventFatalFaultInfo fatalFault;
UvmEventReadDuplicateInfo readDuplicate;
UvmEventReadDuplicateInvalidateInfo readDuplicateInvalidate;
UvmEventPageSizeChangeInfo pageSizeChange;
UvmEventThrashingDetectedInfo thrashing;
UvmEventThrottlingStartInfo throttlingStart;
UvmEventThrottlingEndInfo throttlingEnd;
UvmEventMapRemoteInfo mapRemote;
UvmEventEvictionInfo eviction;
UvmEventCpuFaultInfo_V1 cpuFault;
UvmEventMigrationInfo_V1 migration;
UvmEventGpuFaultInfo_V1 gpuFault;
UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay;
UvmEventFatalFaultInfo_V1 fatalFault;
UvmEventReadDuplicateInfo_V1 readDuplicate;
UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate;
UvmEventPageSizeChangeInfo_V1 pageSizeChange;
UvmEventThrashingDetectedInfo_V1 thrashing;
UvmEventThrottlingStartInfo_V1 throttlingStart;
UvmEventThrottlingEndInfo_V1 throttlingEnd;
UvmEventMapRemoteInfo_V1 mapRemote;
UvmEventEvictionInfo_V1 eviction;
} eventData;
union
{
NvU8 eventType;
UvmEventTestAccessCounterInfo accessCounter;
UvmEventTestAccessCounterInfo_V1 accessCounter;
UvmEventTestSplitInvalidateInfo splitInvalidate;
} testEventData;
};
} UvmEventEntry;
} UvmEventEntry_V1;
typedef struct
{
union
{
union
{
NvU8 eventType;
UvmEventMigrationInfo_Lite migration_Lite;
UvmEventCpuFaultInfo_V2 cpuFault;
UvmEventMigrationInfo_V2 migration;
UvmEventGpuFaultInfo_V2 gpuFault;
UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay;
UvmEventFatalFaultInfo_V2 fatalFault;
UvmEventReadDuplicateInfo_V2 readDuplicate;
UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate;
UvmEventPageSizeChangeInfo_V2 pageSizeChange;
UvmEventThrashingDetectedInfo_V2 thrashing;
UvmEventThrottlingStartInfo_V2 throttlingStart;
UvmEventThrottlingEndInfo_V2 throttlingEnd;
UvmEventMapRemoteInfo_V2 mapRemote;
UvmEventEvictionInfo_V2 eviction;
} eventData;
union
{
NvU8 eventType;
UvmEventTestAccessCounterInfo_V2 accessCounter;
UvmEventTestSplitInvalidateInfo splitInvalidate;
} testEventData;
};
} UvmEventEntry_V2;
//------------------------------------------------------------------------------
// Type of time stamp used in the event entry:
@ -1060,7 +1505,12 @@ typedef enum
UvmDebugAccessTypeWrite = 1,
} UvmDebugAccessType;
typedef struct UvmEventControlData_tag {
typedef enum {
UvmToolsEventQueueVersion_V1 = 1,
UvmToolsEventQueueVersion_V2 = 2,
} UvmToolsEventQueueVersion;
typedef struct UvmEventControlData_V1_tag {
// entries between get_ahead and get_behind are currently being read
volatile NvU32 get_ahead;
volatile NvU32 get_behind;
@ -1070,7 +1520,30 @@ typedef struct UvmEventControlData_tag {
// counter of dropped events
NvU64 dropped[UvmEventNumTypesAll];
} UvmToolsEventControlData;
} UvmToolsEventControlData_V1;
typedef struct UvmEventControlData_V2_tag {
// entries between get_ahead and get_behind are currently being read
volatile NvU32 get_ahead;
volatile NvU32 get_behind;
// entries between put_ahead and put_behind are currently being written
volatile NvU32 put_ahead;
volatile NvU32 put_behind;
// The version values are limited to UvmToolsEventQueueVersion and
// initialized by UvmToolsCreateEventQueue().
NvU32 version;
NvU32 padding32Bits;
// counter of dropped events
NvU64 dropped[UvmEventNumTypesAll];
} UvmToolsEventControlData_V2;
// For backward compatibility:
// TODO: Bug 4465348: remove these after replacing old references.
typedef UvmToolsEventControlData_V1 UvmToolsEventControlData;
typedef UvmEventEntry_V1 UvmEventEntry;
//------------------------------------------------------------------------------
// UVM Tools forward types (handles) definitions

File diff suppressed because it is too large Load Diff

View File

@ -706,11 +706,6 @@ void uvm_va_block_context_free(uvm_va_block_context_t *va_block_context);
// mm is used to initialize the value of va_block_context->mm. NULL is allowed.
void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context, struct mm_struct *mm);
// Return the preferred NUMA node ID for the block's policy.
// If the preferred node ID is NUMA_NO_NODE, the current NUMA node ID
// is returned.
int uvm_va_block_context_get_node(uvm_va_block_context_t *va_block_context);
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
// and page masks could simplify the below APIs and their implementations
// at the cost of having to scan the whole mask for small regions.
@ -1546,7 +1541,11 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
// The [src, src + size) range has to fit within a single PAGE_SIZE page.
//
// LOCKING: The caller must hold the va_block lock
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block, uvm_mem_t *dst, NvU64 src, size_t size);
NV_STATUS uvm_va_block_read_to_cpu(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_mem_t *dst,
NvU64 src,
size_t size);
// Initialize va block retry tracking
void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
@ -2090,11 +2089,14 @@ void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
// Count how many processors have a copy of the given page resident in their
// memory.
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index);
// Get the processor with a resident copy of a page closest to the given
// processor.
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_index_t page_index,
uvm_processor_id_t processor);
@ -2127,6 +2129,11 @@ uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
int nid,
uvm_page_index_t page_index);
// Return the CPU chunk for the given page_index from the first available NUMA
// node from the va_block. Should only be called for HMM va_blocks.
// Locking: The va_block lock must be held.
uvm_cpu_chunk_t *uvm_cpu_chunk_get_any_chunk_for_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Return the struct page * from the chunk corresponding to the given page_index
// Locking: The va_block lock must be held.
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
@ -2241,6 +2248,7 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
// Return the maximum mapping protection for processor_id that will not require
// any permision revocation on the rest of processors.
uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index);

View File

@ -175,6 +175,14 @@ typedef struct
// Scratch node mask. This follows the same rules as scratch_page_mask;
nodemask_t scratch_node_mask;
// Available as scratch space for the internal APIs. This is like a caller-
// save register: it shouldn't be used across function calls which also take
// this va_block_context.
uvm_processor_mask_t scratch_processor_mask;
// Temporary mask in block_add_eviction_mappings().
uvm_processor_mask_t map_processors_eviction;
// State used by uvm_va_block_make_resident
struct uvm_make_resident_context_struct
{
@ -233,6 +241,16 @@ typedef struct
// are removed as the operation progresses.
uvm_page_mask_t revoke_running_page_mask;
// Mask used by block_gpu_split_2m and block_gpu_split_big to track
// splitting of big PTEs but they are never called concurrently. This
// mask can be used concurrently with other page masks.
uvm_page_mask_t big_split_page_mask;
// Mask used by block_unmap_gpu to track non_uvm_lite_gpus which have
// this block mapped. This mask can be used concurrently with other page
// masks.
uvm_processor_mask_t non_uvm_lite_gpus;
uvm_page_mask_t page_mask;
uvm_page_mask_t filtered_page_mask;
uvm_page_mask_t migratable_mask;
@ -276,6 +294,10 @@ typedef struct
struct vm_area_struct *vma;
#if UVM_IS_CONFIG_HMM()
// Temporary mask used in uvm_hmm_block_add_eviction_mappings().
uvm_processor_mask_t map_processors_eviction;
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
struct migrate_vma migrate_vma_args;
#endif

View File

@ -1799,7 +1799,7 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
if (uvm_api_range_invalid(params->base, params->length))
return NV_ERR_INVALID_ADDRESS;
if (params->gpuAttributesCount > UVM_MAX_GPUS)
if (params->gpuAttributesCount > UVM_MAX_GPUS_V2)
return NV_ERR_INVALID_ARGUMENT;
if (g_uvm_global.conf_computing_enabled && params->gpuAttributesCount == 0)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -86,11 +86,13 @@ static void init_tools_data(uvm_va_space_t *va_space)
for (i = 0; i < ARRAY_SIZE(va_space->tools.counters); i++)
INIT_LIST_HEAD(va_space->tools.counters + i);
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues); i++)
INIT_LIST_HEAD(va_space->tools.queues + i);
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v1); i++)
INIT_LIST_HEAD(va_space->tools.queues_v1 + i);
for (i = 0; i < ARRAY_SIZE(va_space->tools.queues_v2); i++)
INIT_LIST_HEAD(va_space->tools.queues_v2 + i);
}
static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
{
uvm_gpu_t *other_gpu;
@ -104,7 +106,7 @@ static NV_STATUS register_gpu_nvlink_peers(uvm_va_space_t *va_space, uvm_gpu_t *
peer_caps = uvm_gpu_peer_caps(gpu, other_gpu);
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1) {
if (peer_caps->link_type >= UVM_GPU_LINK_NVLINK_1 || gpu->parent == other_gpu->parent) {
NV_STATUS status = enable_peers(va_space, gpu, other_gpu);
if (status != NV_OK)
return status;
@ -324,10 +326,16 @@ static void unregister_gpu(uvm_va_space_t *va_space,
}
}
if (gpu->parent->isr.replayable_faults.handling)
if (gpu->parent->isr.replayable_faults.handling) {
UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
}
else {
UVM_ASSERT(uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
uvm_processor_mask_clear(&va_space->non_faultable_processors, gpu->id);
}
processor_mask_array_clear(va_space->can_access, gpu->id, gpu->id);
processor_mask_array_clear(va_space->can_access, gpu->id, UVM_ID_CPU);
@ -514,7 +522,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
if (gpu->parent->access_counters_supported)
uvm_gpu_access_counters_disable(gpu, va_space);
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
}
// Check that all CPU/GPU affinity masks are empty
@ -604,7 +612,7 @@ uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProces
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
if (uvm_uuid_eq(uvm_gpu_uuid(gpu), gpu_uuid))
if (uvm_uuid_eq(&gpu->uuid, gpu_uuid))
return gpu;
}
@ -663,7 +671,8 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
NvBool *numa_enabled,
NvS32 *numa_node_id)
NvS32 *numa_node_id,
NvProcessorUuid *uuid_out)
{
NV_STATUS status;
uvm_va_range_t *va_range;
@ -675,13 +684,15 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
if (status != NV_OK)
return status;
uvm_uuid_copy(uuid_out, &gpu->uuid);
// Enabling access counters requires taking the ISR lock, so it is done
// without holding the (deeper order) VA space lock. Enabling the counters
// after dropping the VA space lock would create a window of time in which
// another thread could see the GPU as registered, but access counters would
// be disabled. Therefore, the counters are enabled before taking the VA
// space lock.
if (uvm_gpu_access_counters_required(gpu->parent)) {
if (uvm_parent_gpu_access_counters_required(gpu->parent)) {
status = uvm_gpu_access_counters_enable(gpu, va_space);
if (status != NV_OK) {
uvm_gpu_release(gpu);
@ -726,10 +737,17 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
if (gpu->parent->isr.replayable_faults.handling) {
UVM_ASSERT(!uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
uvm_processor_mask_set(&va_space->faultable_processors, gpu->id);
UVM_ASSERT(!uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
// System-wide atomics are enabled by default
uvm_processor_mask_set(&va_space->system_wide_atomics_enabled_processors, gpu->id);
}
else {
UVM_ASSERT(!uvm_processor_mask_test(&va_space->non_faultable_processors, gpu->id));
uvm_processor_mask_set(&va_space->non_faultable_processors, gpu->id);
}
// All GPUs have native atomics on their own memory
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
@ -785,7 +803,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
}
}
status = register_gpu_nvlink_peers(va_space, gpu);
status = register_gpu_peers(va_space, gpu);
if (status != NV_OK)
goto cleanup;
@ -822,9 +840,9 @@ done:
if (status != NV_OK) {
// There is no risk of disabling access counters on a previously
// registered GPU: the enablement step would have failed before even
// discovering that the GPU is already registed.
if (uvm_gpu_access_counters_required(gpu->parent))
uvm_gpu_access_counters_disable(gpu, va_space);
// discovering that the GPU is already registered.
if (uvm_parent_gpu_access_counters_required(gpu->parent))
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_release(gpu);
}
@ -876,15 +894,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
// it from the VA space until we're done.
uvm_va_space_up_read_rm(va_space);
// If uvm_gpu_access_counters_required(gpu->parent) is true, a concurrent
// registration could enable access counters after they are disabled here.
// If uvm_parent_gpu_access_counters_required(gpu->parent) is true, a
// concurrent registration could enable access counters after they are
// disabled here.
// The concurrent registration will fail later on if it acquires the VA
// space lock before the unregistration does (because the GPU is still
// registered) and undo the access counters enablement, or succeed if it
// acquires the VA space lock after the unregistration does. Both outcomes
// result on valid states.
if (gpu->parent->access_counters_supported)
uvm_gpu_access_counters_disable(gpu, va_space);
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
// mmap_lock is needed to establish CPU mappings to any pages evicted from
// the GPU if accessed by CPU is set for them.
@ -1040,6 +1059,10 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
processor_mask_array_set(va_space->indirect_peers, gpu1->id, gpu0->id);
}
}
else if (gpu0->parent == gpu1->parent) {
processor_mask_array_set(va_space->has_native_atomics, gpu0->id, gpu1->id);
processor_mask_array_set(va_space->has_native_atomics, gpu1->id, gpu0->id);
}
UVM_ASSERT(va_space_check_processors_masks(va_space));
__set_bit(table_index, va_space->enabled_peers);
@ -1091,6 +1114,7 @@ static NV_STATUS retain_pcie_peers_from_uuids(uvm_va_space_t *va_space,
static bool uvm_va_space_pcie_peer_enabled(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
return !processor_mask_array_test(va_space->has_nvlink, gpu0->id, gpu1->id) &&
gpu0->parent != gpu1->parent &&
uvm_va_space_peer_enabled(va_space, gpu0, gpu1);
}

View File

@ -163,6 +163,10 @@ struct uvm_va_space_struct
// faults.
uvm_processor_mask_t faultable_processors;
// Mask of processors registered with the va space that don't support
// faulting.
uvm_processor_mask_t non_faultable_processors;
// This is a count of non fault capable processors with a GPU VA space
// registered.
NvU32 num_non_faultable_gpu_va_spaces;
@ -261,8 +265,8 @@ struct uvm_va_space_struct
// Mask of processors that are participating in system-wide atomics
uvm_processor_mask_t system_wide_atomics_enabled_processors;
// Mask of GPUs where access counters are enabled on this VA space
uvm_processor_mask_t access_counters_enabled_processors;
// Mask of physical GPUs where access counters are enabled on this VA space
uvm_parent_processor_mask_t access_counters_enabled_processors;
// Array with information regarding CPU/GPU NUMA affinity. There is one
// entry per CPU NUMA node. Entries in the array are populated sequentially
@ -308,7 +312,8 @@ struct uvm_va_space_struct
// Lists of counters listening for events on this VA space
struct list_head counters[UVM_TOTAL_COUNTERS];
struct list_head queues[UvmEventNumTypesAll];
struct list_head queues_v1[UvmEventNumTypesAll];
struct list_head queues_v2[UvmEventNumTypesAll];
// Node for this va_space in global subscribers list
struct list_head node;
@ -399,7 +404,7 @@ static void uvm_va_space_processor_uuid(uvm_va_space_t *va_space, NvProcessorUui
else {
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
UVM_ASSERT(gpu);
memcpy(uuid, uvm_gpu_uuid(gpu), sizeof(*uuid));
memcpy(uuid, &gpu->uuid, sizeof(*uuid));
}
}
@ -472,9 +477,9 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space);
uvm_mutex_unlock(&(__va_space)->serialize_writers_lock); \
} while (0)
// Get a registered gpu by uuid. This restricts the search for GPUs, to those that
// have been registered with a va_space. This returns NULL if the GPU is not present, or not
// registered with the va_space.
// Get a registered gpu by uuid. This restricts the search for GPUs, to those
// that have been registered with a va_space. This returns NULL if the GPU is
// not present, or not registered with the va_space.
//
// LOCKING: The VA space lock must be held.
uvm_gpu_t *uvm_va_space_get_gpu_by_uuid(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);
@ -501,13 +506,19 @@ bool uvm_va_space_can_read_duplicate(uvm_va_space_t *va_space, uvm_gpu_t *changi
// Register a gpu in the va space
// Note that each gpu can be only registered once in a va space
//
// The input gpu_uuid is for the phyisical GPU. The user_rm_va_space argument
// identifies the SMC partition if provided and SMC is enabled.
//
// This call returns whether the GPU memory is a NUMA node in the kernel and the
// corresponding node id.
// It also returns the GI UUID (if gpu_uuid is a SMC partition) or a copy of
// gpu_uuid if the GPU is not SMC capable or SMC is not enabled.
NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_va_space,
NvBool *numa_enabled,
NvS32 *numa_node_id);
NvS32 *numa_node_id,
NvProcessorUuid *uuid_out);
// Unregister a gpu from the va space
NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcessorUuid *gpu_uuid);

View File

@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
}
}
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
uvm_va_space_pageable_mem_access_supported(va_space)) {
#if UVM_CAN_USE_MMU_NOTIFIERS()
// Initialize MMU interval notifiers for this process. This allows
// mmu_interval_notifier_insert() to be called without holding the

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -53,6 +53,7 @@
(defined(CONFIG_CRYPTO_HMAC) || defined(CONFIG_CRYPTO_HMAC_MODULE)) && \
(defined(CONFIG_CRYPTO_ECDH) || defined(CONFIG_CRYPTO_ECDH_MODULE)) && \
(defined(CONFIG_CRYPTO_ECDSA) || defined(CONFIG_CRYPTO_ECDSA_MODULE)) && \
(defined(CONFIG_CRYPTO_RSA) || defined(CONFIG_CRYPTO_RSA_MODULE)) && \
(defined(CONFIG_X509_CERTIFICATE_PARSER) || defined(CONFIG_X509_CERTIFICATE_PARSER_MODULE))
#define NV_CONFIG_CRYPTO_PRESENT 1
#endif
@ -151,4 +152,17 @@ bool lkca_ec_compute_key(void *ec_context, const uint8_t *peer_public,
bool lkca_ecdsa_verify(void *ec_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
const uint8_t *signature, size_t sig_size);
bool lkca_rsa_verify(void *rsa_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
const uint8_t *signature, size_t sig_size);
bool lkca_rsa_pkcs1_sign(void *rsa_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
uint8_t *signature, size_t *sig_size);
bool lkca_rsa_pss_sign(void *rsa_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
uint8_t *signature, size_t *sig_size);
#endif

View File

@ -0,0 +1,611 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "internal_crypt_lib.h"
#include "library/cryptlib.h"
#ifdef USE_LKCA
#include <linux/module.h>
#include <linux/mpi.h>
#include <linux/random.h>
#include <crypto/akcipher.h>
#include <crypto/internal/rsa.h>
/* ------------------------ Macros & Defines ------------------------------- */
#define GET_MOST_SIGNIFICANT_BIT(keySize) (keySize > 0 ? ((keySize - 1) & 7) : 0)
#define GET_ENC_MESSAGE_SIZE_BYTE(keySize) (keySize + 7) >> 3;
#define PKCS1_MGF1_COUNTER_SIZE_BYTE (4)
#define RSA_PSS_PADDING_ZEROS_SIZE_BYTE (8)
#define RSA_PSS_TRAILER_FIELD (0xbc)
#define SHIFT_RIGHT_AND_GET_BYTE(val, x) ((val >> x) & 0xFF)
#define BITS_TO_BYTES(b) (b >> 3)
static const unsigned char zeroes[RSA_PSS_PADDING_ZEROS_SIZE_BYTE] = { 0 };
struct rsa_ctx
{
struct rsa_key key;
bool pub_key_set;
bool priv_key_set;
int size;
};
#endif // #ifdef USE_LKCA
/*!
* Creating and initializing a RSA context.
*
* @return : A void pointer points to a RSA context
*
*/
void *libspdm_rsa_new
(
void
)
{
#ifndef USE_LKCA
return NULL;
#else
struct rsa_ctx *ctx;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
{
return NULL;
}
memset(ctx, 0, sizeof(*ctx));
ctx->pub_key_set = false;
ctx->priv_key_set = false;
return ctx;
#endif
}
/*!
* To free a RSA context.
*
* @param rsa_context : A RSA context pointer
*
*/
void libspdm_rsa_free
(
void *rsa_context
)
{
#ifdef USE_LKCA
struct rsa_ctx *ctx = rsa_context;
if (ctx != NULL)
{
if (ctx->key.n) kfree(ctx->key.n);
if (ctx->key.e) kfree(ctx->key.e);
if (ctx->key.d) kfree(ctx->key.d);
if (ctx->key.q) kfree(ctx->key.q);
if (ctx->key.p) kfree(ctx->key.p);
if (ctx->key.dq) kfree(ctx->key.dq);
if (ctx->key.dp) kfree(ctx->key.dp);
if (ctx->key.qinv) kfree(ctx->key.qinv);
kfree(ctx);
}
#endif
}
#define rsa_set_key_case(a, a_sz, A) \
case A: \
{ \
if (ctx->key.a) { \
kfree(ctx->key.a); \
} \
ctx->key.a = shadow_num; \
ctx->key.a_sz = bn_size; \
break; \
}
/*!
* To set key into RSA context.
*
* @param rsa_context : A RSA context pointer
* @param key_tag : Indicate key tag for RSA key
* @param big_number : A big nuMber buffer to store rsa KEY
* @param bn_size : The size of bug number
*
* @Return : True if OK; otherwise return False
*/
bool libspdm_rsa_set_key
(
void *rsa_context,
const libspdm_rsa_key_tag_t key_tag,
const uint8_t *big_number,
size_t bn_size
)
{
#ifndef USE_LKCA
return false;
#else
struct rsa_ctx *ctx = rsa_context;
uint8_t *shadow_num;
if (ctx == NULL)
{
return false;
}
// Quick sanity check if tag is valid
switch (key_tag)
{
case LIBSPDM_RSA_KEY_N:
case LIBSPDM_RSA_KEY_E:
case LIBSPDM_RSA_KEY_D:
case LIBSPDM_RSA_KEY_Q:
case LIBSPDM_RSA_KEY_P:
case LIBSPDM_RSA_KEY_DP:
case LIBSPDM_RSA_KEY_DQ:
case LIBSPDM_RSA_KEY_Q_INV:
break;
default:
return false;
break;
}
if (big_number != NULL)
{
shadow_num = kmalloc(bn_size, GFP_KERNEL);
if (shadow_num == NULL)
{
return false;
}
memcpy(shadow_num, big_number, bn_size);
}
else
{
shadow_num = NULL;
bn_size = 0;
}
switch (key_tag)
{
rsa_set_key_case(n, n_sz, LIBSPDM_RSA_KEY_N)
rsa_set_key_case(e, e_sz, LIBSPDM_RSA_KEY_E)
rsa_set_key_case(d, d_sz, LIBSPDM_RSA_KEY_D)
rsa_set_key_case(q, q_sz, LIBSPDM_RSA_KEY_Q)
rsa_set_key_case(p, p_sz, LIBSPDM_RSA_KEY_P)
rsa_set_key_case(dq, dq_sz, LIBSPDM_RSA_KEY_DQ)
rsa_set_key_case(dp, dp_sz, LIBSPDM_RSA_KEY_DP)
rsa_set_key_case(qinv, qinv_sz, LIBSPDM_RSA_KEY_Q_INV)
default:
// We can't get here ever
break;
}
return true;
#endif
}
/*!
* Perform PKCS1 MGF1 operation.
*
* @param mask : A mask pointer to store return data
* @param maskedDB_length : Indicate mask data block length
* @param seed : A seed pointer to store random values
* @param seed_length : The seed length
* @param hash_nid : The hash NID
*
* @Return : True if OK; otherwise return False
*/
static bool NV_PKCS1_MGF1
(
uint8_t *mask,
size_t maskedDB_length,
const uint8_t *seed,
size_t seed_length,
size_t hash_nid
)
{
#ifndef USE_LKCA
return false;
#else
size_t mdLength;
size_t counter;
size_t outLength;
uint8_t counterBuf[4];
void *sha384_ctx = NULL;
uint8_t hash_value[LIBSPDM_SHA384_DIGEST_SIZE];
bool status = false;
if (mask == NULL || seed == NULL)
{
return false;
}
// Only support SHA384 for MGF1 now.
if (hash_nid == LIBSPDM_CRYPTO_NID_SHA384)
{
mdLength = LIBSPDM_SHA384_DIGEST_SIZE;
}
else
{
return false;
}
sha384_ctx = libspdm_sha384_new();
if (sha384_ctx == NULL)
{
pr_err("%s : libspdm_sha384_new() failed \n", __FUNCTION__);
return false;
}
for (counter = 0, outLength = 0; outLength < maskedDB_length; counter++)
{
counterBuf[0] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 24);
counterBuf[1] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 16);
counterBuf[2] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 8);
counterBuf[3] = (uint8_t)SHIFT_RIGHT_AND_GET_BYTE(counter, 0);
status = libspdm_sha384_init(sha384_ctx);
if (!status)
{
pr_err("%s: libspdm_sha384_init() failed !! \n", __FUNCTION__);
goto _error_exit;
}
status = libspdm_sha384_update(sha384_ctx, seed, seed_length);
if (!status)
{
pr_err("%s: libspdm_sha384_update() failed(seed) !! \n", __FUNCTION__);
goto _error_exit;
}
status = libspdm_sha384_update(sha384_ctx, counterBuf, 4);
if (!status)
{
pr_err("%s: libspdm_sha384_update() failed(counterBuf) !! \n", __FUNCTION__);
goto _error_exit;
}
if (outLength + mdLength <= maskedDB_length)
{
status = libspdm_sha384_final(sha384_ctx, mask + outLength);
if (!status)
{
pr_err("%s: libspdm_sha384_final() failed (<= maskedDB_length) !! \n", __FUNCTION__);
goto _error_exit;
}
outLength += mdLength;
}
else
{
status = libspdm_sha384_final(sha384_ctx, hash_value);
if (!status)
{
pr_err("%s: libspdm_sha384_final() failed(> maskedDB_length) !! \n", __FUNCTION__);
goto _error_exit;
}
memcpy(mask + outLength, hash_value, maskedDB_length - outLength);
outLength = maskedDB_length;
}
}
status = true;
_error_exit:
libspdm_sha384_free(sha384_ctx);
return status;
#endif
}
/*
0xbc : Trailer Field
+-----------+
| M |
+-----------+
|
V
Hash
|
V
+--------+----------+----------+
M' = |Padding1| mHash | salt |
+--------+----------+----------+
|--------------|---------------|
|
+--------+----------+ V
DB = |Padding2| salt | Hash
+--------+----------+ |
| |
V |
xor <--- MGF <---|
| |
| |
V V
+-------------------+----------+----+
EM = | maskedDB | H |0xbc|
+-------------------+----------+----+
salt : The random number, we hardcode its size as hash size here.
M' : The concatenation of padding1 + message hash + salt
MGF : Mask generation function.
A mask generation function takes an octet string of variable length
and a desired output length as input, and outputs an octet string of
the desired length
MGF1 is a Mask Generation Function based on a hash function.
Padding1 : 8 zeros
Padding2 : 0x01
The detail spec is at https://datatracker.ietf.org/doc/html/rfc2437
*/
/*!
* Set keys and call PKCS1_MGF1 to generate signature.
*
* @param rsa_context : A RSA context pointer
* @param hash_nid : The hash NID
* @param message_hash : The pointer to message hash
* @param signature : The pointer is used to store generated signature
* @param sig_size : For input, a pointer store signature buffer size.
* For output, a pointer store generate signature size.
* @param salt_Length : The salt length for RSA-PSS algorithm
*
* @Return : True if OK; otherwise return False
*/
static bool nvRsaPaddingAddPkcs1PssMgf1
(
void *rsa_context,
size_t hash_nid,
const uint8_t *message_hash,
size_t hash_size,
uint8_t *signature,
size_t *sig_size,
int salt_length
)
{
#ifndef USE_LKCA
return false;
#else
bool status = false;
struct rsa_ctx *ctx = rsa_context;
void *sha384_ctx = NULL;
uint32_t keySize;
uint32_t msBits;
size_t emLength;
uint8_t saltBuf[64];
size_t maskedDB_length;
size_t i;
uint8_t *tmp_H;
uint8_t *tmp_P;
int rc;
unsigned int ret_data_size;
MPI mpi_n = NULL;
MPI mpi_d = NULL;
MPI mpi_c = mpi_alloc(0);
MPI mpi_p = mpi_alloc(0);
// read modulus to BN struct
mpi_n = mpi_read_raw_data(ctx->key.n, ctx->key.n_sz);
if (mpi_n == NULL)
{
pr_err("%s : mpi_n create failed !! \n", __FUNCTION__);
goto _error_exit;
}
// read private exponent to BN struct
mpi_d = mpi_read_raw_data(ctx->key.d, ctx->key.d_sz);
if (mpi_d == NULL)
{
pr_err("%s : mpi_d create failed !! \n", __FUNCTION__);
goto _error_exit;
}
keySize = mpi_n->nbits;
msBits = GET_MOST_SIGNIFICANT_BIT(keySize);
emLength = BITS_TO_BYTES(keySize);
if (msBits == 0)
{
*signature++ = 0;
emLength--;
}
if (emLength < hash_size + 2)
{
pr_err("%s : emLength < hash_size + 2 !! \n", __FUNCTION__);
goto _error_exit;
}
// Now, we only support salt_length == LIBSPDM_SHA384_DIGEST_SIZE
if (salt_length != LIBSPDM_SHA384_DIGEST_SIZE ||
hash_nid != LIBSPDM_CRYPTO_NID_SHA384)
{
pr_err("%s : Invalid salt_length (%x) \n", __FUNCTION__, salt_length);
goto _error_exit;
}
get_random_bytes(saltBuf, salt_length);
maskedDB_length = emLength - hash_size - 1;
tmp_H = signature + maskedDB_length;
sha384_ctx = libspdm_sha384_new();
if (sha384_ctx == NULL)
{
pr_err("%s : libspdm_sha384_new() failed !! \n", __FUNCTION__);
goto _error_exit;
}
status = libspdm_sha384_init(sha384_ctx);
if (!status)
{
pr_err("%s : libspdm_sha384_init() failed !! \n", __FUNCTION__);
goto _error_exit;
}
status = libspdm_sha384_update(sha384_ctx, zeroes, sizeof(zeroes));
if (!status)
{
pr_err("%s : libspdm_sha384_update() with zeros failed !!\n", __FUNCTION__);
goto _error_exit;
}
status = libspdm_sha384_update(sha384_ctx, message_hash, hash_size);
if (!status)
{
pr_err("%s: libspdm_sha384_update() with message_hash failed !!\n", __FUNCTION__);
goto _error_exit;
}
if (salt_length)
{
status = libspdm_sha384_update(sha384_ctx, saltBuf, salt_length);
if (!status)
{
pr_err("%s : libspdm_sha384_update() with saltBuf failed !!\n", __FUNCTION__);
goto _error_exit;
}
}
status = libspdm_sha384_final(sha384_ctx, tmp_H);
if (!status)
{
pr_err("%s : libspdm_sha384_final() with tmp_H failed !!\n", __FUNCTION__);
goto _error_exit;
}
/* Generate dbMask in place then perform XOR on it */
status = NV_PKCS1_MGF1(signature, maskedDB_length, tmp_H, hash_size, hash_nid);
if (!status)
{
pr_err("%s : NV_PKCS1_MGF1() failed \n", __FUNCTION__);
goto _error_exit;
}
tmp_P = signature;
tmp_P += emLength - salt_length - hash_size - 2;
*tmp_P++ ^= 0x1;
if (salt_length > 0)
{
for (i = 0; i < salt_length; i++)
{
*tmp_P++ ^= saltBuf[i];
}
}
if (msBits)
{
signature[0] &= 0xFF >> (8 - msBits);
}
/* H is already in place so just set final 0xbc */
signature[emLength - 1] = RSA_PSS_TRAILER_FIELD;
// read signature to BN struct
mpi_p = mpi_read_raw_data(signature, emLength);
if (mpi_p == NULL)
{
pr_err("%s : mpi_p() create failed !!\n", __FUNCTION__);
goto _error_exit;
}
// Staring RSA encryption with private key over signature.
rc = mpi_powm(mpi_c, mpi_p, mpi_d, mpi_n);
if (rc != 0)
{
pr_err("%s : mpi_powm() failed \n", __FUNCTION__);
goto _error_exit;
}
rc = mpi_read_buffer(mpi_c, signature, *sig_size, &ret_data_size, NULL);
if (rc != 0)
{
pr_err("%s : mpi_read_buffer() failed \n", __FUNCTION__);
goto _error_exit;
}
if (ret_data_size > *sig_size)
{
goto _error_exit;
}
*sig_size = ret_data_size;
status = true;
_error_exit:
mpi_free(mpi_n);
mpi_free(mpi_d);
mpi_free(mpi_c);
mpi_free(mpi_p);
libspdm_sha384_free(sha384_ctx);
return status;
#endif
}
/*!
* Perform RSA-PSS sigaature sign process with LKCA library.
*
* @param rsa_context : A RSA context pointer
* @param hash_nid : The hash NID
* @param message_hash : The pointer to message hash
* @param signature : The pointer is used to store generated signature
* @param sig_size : For input, a pointer store signature buffer size.
* For output, a pointer store generate signature size.
*
* @Return : True if OK; otherwise return False
*/
bool lkca_rsa_pss_sign
(
void *rsa_context,
size_t hash_nid,
const uint8_t *message_hash,
size_t hash_size,
uint8_t *signature,
size_t *sig_size
)
{
#ifndef USE_LKCA
return true;
#else
return nvRsaPaddingAddPkcs1PssMgf1(rsa_context,
hash_nid,
message_hash,
hash_size,
signature,
sig_size,
LIBSPDM_SHA384_DIGEST_SIZE);
#endif
}

View File

@ -0,0 +1,85 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Comments, prototypes and checks taken from DMTF: Copyright 2021-2022 DMTF. All rights reserved.
* License: BSD 3-Clause License. For full text see link: https://github.com/DMTF/libspdm/blob/main/LICENSE.md
*/
/** @file
* RSA Asymmetric Cipher Wrapper Implementation.
*
* This file implements following APIs which provide more capabilities for RSA:
* 1) rsa_pss_sign
*
* RFC 8017 - PKCS #1: RSA Cryptography Specifications version 2.2
**/
#include "internal_crypt_lib.h"
#include "library/cryptlib.h"
/**
* Carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme.
*
* This function carries out the RSA-PSS signature generation with EMSA-PSS encoding scheme defined in
* RSA PKCS#1 v2.2.
*
* The salt length is same as digest length.
*
* If the signature buffer is too small to hold the contents of signature, false
* is returned and sig_size is set to the required buffer size to obtain the signature.
*
* If rsa_context is NULL, then return false.
* If message_hash is NULL, then return false.
* If hash_size need match the hash_nid. nid could be SHA256, SHA384, SHA512, SHA3_256, SHA3_384, SHA3_512.
* If sig_size is large enough but signature is NULL, then return false.
*
* @param[in] rsa_context Pointer to RSA context for signature generation.
* @param[in] hash_nid hash NID
* @param[in] message_hash Pointer to octet message hash to be signed.
* @param[in] hash_size size of the message hash in bytes.
* @param[out] signature Pointer to buffer to receive RSA-SSA PSS signature.
* @param[in, out] sig_size On input, the size of signature buffer in bytes.
* On output, the size of data returned in signature buffer in bytes.
*
* @retval true signature successfully generated in RSA-SSA PSS.
* @retval false signature generation failed.
* @retval false sig_size is too small.
*
**/
bool libspdm_rsa_pss_sign(void *rsa_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
uint8_t *signature, size_t *sig_size)
{
return lkca_rsa_pss_sign(rsa_context, hash_nid, message_hash, hash_size,
signature, sig_size);
}
//
// In RM, we just need sign process; so we stub verification function.
// Verification function is needed in GSP code only,
//
bool libspdm_rsa_pss_verify(void *rsa_context, size_t hash_nid,
const uint8_t *message_hash, size_t hash_size,
const uint8_t *signature, size_t sig_size)
{
return false;
}

View File

@ -0,0 +1,153 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "nv-linux.h"
extern int NVreg_ImexChannelCount;
static int nv_caps_imex_open(struct inode *inode, struct file *file)
{
return 0;
}
static int nv_caps_imex_release(struct inode *inode, struct file *file)
{
return 0;
}
static struct file_operations g_nv_caps_imex_fops =
{
.owner = THIS_MODULE,
.open = nv_caps_imex_open,
.release = nv_caps_imex_release
};
struct
{
NvBool initialized;
struct cdev cdev;
dev_t devno;
} g_nv_caps_imex;
int NV_API_CALL nv_caps_imex_channel_get(int fd)
{
#if NV_FILESYSTEM_ACCESS_AVAILABLE
struct file *file;
struct inode *inode;
int channel = -1;
file = fget(fd);
if (file == NULL)
{
return channel;
}
inode = NV_FILE_INODE(file);
if (inode == NULL)
{
goto out;
}
/* Make sure the fd belongs to the nv-caps-imex-drv */
if (file->f_op != &g_nv_caps_imex_fops)
{
goto out;
}
/* minor number is same as channel */
channel = MINOR(inode->i_rdev);
out:
fput(file);
return channel;
#else
return -1;
#endif
}
int NV_API_CALL nv_caps_imex_channel_count(void)
{
return NVreg_ImexChannelCount;
}
int NV_API_CALL nv_caps_imex_init(void)
{
int rc;
if (g_nv_caps_imex.initialized)
{
nv_printf(NV_DBG_ERRORS, "nv-caps-imex is already initialized.\n");
return -EBUSY;
}
if (NVreg_ImexChannelCount == 0)
{
nv_printf(NV_DBG_INFO, "nv-caps-imex is disabled.\n");
return 0;
}
rc = alloc_chrdev_region(&g_nv_caps_imex.devno, 0,
NVreg_ImexChannelCount,
"nvidia-caps-imex-channels");
if (rc < 0)
{
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to create cdev.\n");
return rc;
}
cdev_init(&g_nv_caps_imex.cdev, &g_nv_caps_imex_fops);
g_nv_caps_imex.cdev.owner = THIS_MODULE;
rc = cdev_add(&g_nv_caps_imex.cdev, g_nv_caps_imex.devno,
NVreg_ImexChannelCount);
if (rc < 0)
{
nv_printf(NV_DBG_ERRORS, "nv-caps-imex failed to add cdev.\n");
goto cdev_add_fail;
}
g_nv_caps_imex.initialized = NV_TRUE;
return 0;
cdev_add_fail:
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
return rc;
}
void NV_API_CALL nv_caps_imex_exit(void)
{
if (!g_nv_caps_imex.initialized)
{
return;
}
cdev_del(&g_nv_caps_imex.cdev);
unregister_chrdev_region(g_nv_caps_imex.devno, NVreg_ImexChannelCount);
g_nv_caps_imex.initialized = NV_FALSE;
}

View File

@ -0,0 +1,34 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NV_CAPS_IMEX_H_
#define _NV_CAPS_IMEX_H_
#include <nv-kernel-interface-api.h>
int NV_API_CALL nv_caps_imex_init(void);
void NV_API_CALL nv_caps_imex_exit(void);
int NV_API_CALL nv_caps_imex_channel_get(int fd);
int NV_API_CALL nv_caps_imex_channel_count(void);
#endif /* _NV_CAPS_IMEX_H_ */

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -577,12 +577,9 @@ int nvidia_mmap_helper(
//
// This path is similar to the sysmem mapping code.
// TODO: Refactor is needed as part of bug#2001704.
// Use pfn_valid to determine whether the physical address has
// backing struct page. This is used to isolate P8 from P9.
//
if ((nv_get_numa_status(nvl) == NV_NUMA_STATUS_ONLINE) &&
!IS_REG_OFFSET(nv, access_start, access_len) &&
(pfn_valid(PFN_DOWN(mmap_start))))
!IS_REG_OFFSET(nv, access_start, access_len))
{
ret = nvidia_mmap_numa(vma, mmap_context);
if (ret)

View File

@ -839,6 +839,45 @@
#define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
#define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)
/*
* Option: NVreg_ImexChannelCount
*
* Description:
*
* This option allows users to specify the number of IMEX (import/export)
* channels. Within an IMEX domain, the channels allow sharing memory
* securely in a multi-user environment using the CUDA driver's fabric handle
* based APIs.
*
* An IMEX domain is either an OS instance or a group of securely
* connected OS instances using the NVIDIA IMEX daemon. The option must
* be set to the same value on each OS instance within the IMEX domain.
*
* An IMEX channel is a logical entity that is represented by a /dev node.
* The IMEX channels are global resources within the IMEX domain. When
* exporter and importer CUDA processes have been granted access to the
* same IMEX channel, they can securely share memory.
*
* Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
* the related CUDA APIs will fail with an insufficient permission error until
* the /dev nodes are set up. The creation of these /dev nodes,
* /dev/nvidia-caps-imex-channels/channelN, must be handled by the
* administrator, where N is the minor number. The major number can be
* queried from /proc/devices.
*
* nvidia-modprobe CLI support is available to set up the /dev nodes.
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
*
* Possible values:
* 0 - Disable IMEX using CUDA driver's fabric handles.
* N - N IMEX channels will be enabled in the driver to facilitate N
* concurrent users. Default value is 2048 channels, and the current
* maximum value is 20-bit, same as Linux dev_t's minor number limit.
*/
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
/*
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
/*
*----------------registry database definition----------------------
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
{NULL, NULL}
};

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -55,6 +55,7 @@
#include "nv-kthread-q.h"
#include "nv-pat.h"
#include "nv-dmabuf.h"
#include "nv-caps-imex.h"
#if !defined(CONFIG_RETPOLINE)
#include "nv-retpoline.h"
@ -825,11 +826,18 @@ static int __init nvidia_init_module(void)
goto procfs_exit;
}
rc = nv_caps_imex_init();
if (rc < 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize IMEX channels.\n");
goto caps_root_exit;
}
rc = nv_module_init(&sp);
if (rc < 0)
{
nv_printf(NV_DBG_ERRORS, "NVRM: failed to initialize module.\n");
goto caps_root_exit;
goto caps_imex_exit;
}
count = nvos_count_devices();
@ -941,6 +949,9 @@ drivers_exit:
module_exit:
nv_module_exit(sp);
caps_imex_exit:
nv_caps_imex_exit();
caps_root_exit:
nv_caps_root_exit();
@ -967,6 +978,8 @@ static void __exit nvidia_exit_module(void)
nv_module_exit(sp);
nv_caps_imex_exit();
nv_caps_root_exit();
nv_procfs_exit();
@ -2040,7 +2053,7 @@ nvidia_close_callback(
{
nv_linux_state_t *nvl;
nv_state_t *nv;
nvidia_stack_t *sp;
nvidia_stack_t *sp = nvlfp->sp;
NvBool bRemove = NV_FALSE;
nvl = nvlfp->nvptr;
@ -2052,12 +2065,11 @@ nvidia_close_callback(
*/
nv_free_file_private(nvlfp);
nv_kmem_cache_free_stack(nvlfp->sp);
nv_kmem_cache_free_stack(sp);
return;
}
nv = NV_STATE_PTR(nvl);
sp = nvlfp->sp;
rm_cleanup_file_private(sp, nv, &nvlfp->nvfp);
@ -6050,6 +6062,131 @@ failed:
return NV_ERR_NOT_SUPPORTED;
}
void NV_API_CALL nv_get_screen_info(
nv_state_t *nv,
NvU64 *pPhysicalAddress,
NvU32 *pFbWidth,
NvU32 *pFbHeight,
NvU32 *pFbDepth,
NvU32 *pFbPitch,
NvU64 *pFbSize
)
{
*pPhysicalAddress = 0;
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = *pFbSize = 0;
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
if (num_registered_fb > 0)
{
int i;
for (i = 0; i < num_registered_fb; i++)
{
if (!registered_fb[i])
continue;
/* Make sure base address is mapped to GPU BAR */
if (NV_IS_CONSOLE_MAPPED(nv, registered_fb[i]->fix.smem_start))
{
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
*pFbWidth = registered_fb[i]->var.xres;
*pFbHeight = registered_fb[i]->var.yres;
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
*pFbPitch = registered_fb[i]->fix.line_length;
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
return;
}
}
}
#endif
/*
* If the screen info is not found in the registered FBs then fallback
* to the screen_info structure.
*
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
* generic framebuffers so the new generic system-framebuffer drivers can
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
* device created by SYSFB_SIMPLEFB.
*
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
* information required by nv_get_screen_info(), therefore you need to
* fall back onto the screen_info structure.
*
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
*/
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
/*
* If there is not a framebuffer console, return 0 size.
*
* orig_video_isVGA is set to 1 during early Linux kernel
* initialization, and then will be set to a value, such as
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
*/
if (screen_info.orig_video_isVGA > 1)
{
NvU64 physAddr = screen_info.lfb_base;
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
#endif
/* Make sure base address is mapped to GPU BAR */
if (NV_IS_CONSOLE_MAPPED(nv, physAddr))
{
*pPhysicalAddress = physAddr;
*pFbWidth = screen_info.lfb_width;
*pFbHeight = screen_info.lfb_height;
*pFbDepth = screen_info.lfb_depth;
*pFbPitch = screen_info.lfb_linelength;
*pFbSize = (NvU64)(*pFbHeight) * (NvU64)(*pFbPitch);
}
}
#else
{
nv_linux_state_t *nvl = NV_GET_NVL_FROM_NV_STATE(nv);
struct pci_dev *pci_dev = nvl->pci_dev;
int i;
if (pci_dev == NULL)
return;
BUILD_BUG_ON(NV_GPU_BAR_INDEX_IMEM != NV_GPU_BAR_INDEX_FB + 1);
for (i = NV_GPU_BAR_INDEX_FB; i <= NV_GPU_BAR_INDEX_IMEM; i++)
{
int bar_index = nv_bar_index_to_os_bar_index(pci_dev, i);
struct resource *gpu_bar_res = &pci_dev->resource[bar_index];
struct resource *res = gpu_bar_res->child;
/*
* Console resource will become child resource of pci-dev resource.
* Check if child resource start address matches with expected
* console start address.
*/
if ((res != NULL) &&
NV_IS_CONSOLE_MAPPED(nv, res->start))
{
NvU32 res_name_len = strlen(res->name);
/*
* The resource name ends with 'fb' (efifb, vesafb, etc.).
* For simple-framebuffer, the resource name is 'BOOTFB'.
* Confirm if the resources name either ends with 'fb' or 'FB'.
*/
if ((res_name_len > 2) &&
!strcasecmp((res->name + res_name_len - 2), "fb"))
{
*pPhysicalAddress = res->start;
*pFbSize = resource_size(res);
return;
}
}
}
}
#endif
}
module_init(nvidia_init_module);
module_exit(nvidia_exit_module);

View File

@ -279,9 +279,11 @@ NV_STATUS nvGpuOpsPagingChannelPushStream(UvmGpuPagingChannel *channel,
char *methodStream,
NvU32 methodStreamSize);
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(gpuFaultInfo *pFaultInfo,
NvBool bCopyAndFlush);
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo, NvBool bEnable);
NV_STATUS nvGpuOpsTogglePrefetchFaults(gpuFaultInfo *pFaultInfo,
NvBool bEnable);
// Interface used for CCSL

View File

@ -985,24 +985,30 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
}
EXPORT_SYMBOL(nvUvmInterfaceGetNonReplayableFaults);
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device)
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(UvmGpuFaultInfo *pFaultInfo,
NvBool bCopyAndFlush)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
NV_STATUS status;
status = rm_gpu_ops_flush_replayable_fault_buffer(sp, (gpuDeviceHandle)device);
status = rm_gpu_ops_flush_replayable_fault_buffer(sp,
pFaultInfo,
bCopyAndFlush);
nvUvmFreeSafeStack(sp);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceFlushReplayableFaultBuffer);
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo, NvBool bEnable)
NV_STATUS nvUvmInterfaceTogglePrefetchFaults(UvmGpuFaultInfo *pFaultInfo,
NvBool bEnable)
{
nvidia_stack_t *sp = nvUvmGetSafeStack();
NV_STATUS status;
status = rm_gpu_ops_toggle_prefetch_faults(sp, pFaultInfo, bEnable);
status = rm_gpu_ops_toggle_prefetch_faults(sp,
pFaultInfo,
bEnable);
nvUvmFreeSafeStack(sp);
return status;

View File

@ -30,18 +30,21 @@ NVIDIA_SOURCES += nvidia/nv-report-err.c
NVIDIA_SOURCES += nvidia/nv-rsync.c
NVIDIA_SOURCES += nvidia/nv-msi.c
NVIDIA_SOURCES += nvidia/nv-caps.c
NVIDIA_SOURCES += nvidia/nv-caps-imex.c
NVIDIA_SOURCES += nvidia/nv_uvm_interface.c
NVIDIA_SOURCES += nvidia/libspdm_aead.c
NVIDIA_SOURCES += nvidia/libspdm_ecc.c
NVIDIA_SOURCES += nvidia/libspdm_hkdf.c
NVIDIA_SOURCES += nvidia/libspdm_rand.c
NVIDIA_SOURCES += nvidia/libspdm_shash.c
NVIDIA_SOURCES += nvidia/libspdm_rsa.c
NVIDIA_SOURCES += nvidia/libspdm_aead_aes_gcm.c
NVIDIA_SOURCES += nvidia/libspdm_sha.c
NVIDIA_SOURCES += nvidia/libspdm_hmac_sha.c
NVIDIA_SOURCES += nvidia/libspdm_hkdf_sha.c
NVIDIA_SOURCES += nvidia/libspdm_ec.c
NVIDIA_SOURCES += nvidia/libspdm_x509.c
NVIDIA_SOURCES += nvidia/libspdm_rsa_ext.c
NVIDIA_SOURCES += nvidia/nvlink_linux.c
NVIDIA_SOURCES += nvidia/nvlink_caps.c
NVIDIA_SOURCES += nvidia/linux_nvswitch.c

View File

@ -25,6 +25,7 @@
#include "os-interface.h"
#include "nv-linux.h"
#include "nv-caps-imex.h"
#include "nv-time.h"
@ -59,6 +60,8 @@ NvBool os_dma_buf_enabled = NV_TRUE;
NvBool os_dma_buf_enabled = NV_FALSE;
#endif // CONFIG_DMA_SHARED_BUFFER
NvBool os_imex_channel_is_supported = NV_TRUE;
void NV_API_CALL os_disable_console_access(void)
{
console_lock();
@ -1231,90 +1234,6 @@ NvBool NV_API_CALL os_is_efi_enabled(void)
return efi_enabled(EFI_BOOT);
}
void NV_API_CALL os_get_screen_info(
NvU64 *pPhysicalAddress,
NvU32 *pFbWidth,
NvU32 *pFbHeight,
NvU32 *pFbDepth,
NvU32 *pFbPitch,
NvU64 consoleBar1Address,
NvU64 consoleBar2Address
)
{
*pPhysicalAddress = 0;
*pFbWidth = *pFbHeight = *pFbDepth = *pFbPitch = 0;
#if defined(CONFIG_FB) && defined(NV_NUM_REGISTERED_FB_PRESENT)
if (num_registered_fb > 0)
{
int i;
for (i = 0; i < num_registered_fb; i++)
{
if (!registered_fb[i])
continue;
/* Make sure base address is mapped to GPU BAR */
if ((registered_fb[i]->fix.smem_start == consoleBar1Address) ||
(registered_fb[i]->fix.smem_start == consoleBar2Address))
{
*pPhysicalAddress = registered_fb[i]->fix.smem_start;
*pFbWidth = registered_fb[i]->var.xres;
*pFbHeight = registered_fb[i]->var.yres;
*pFbDepth = registered_fb[i]->var.bits_per_pixel;
*pFbPitch = registered_fb[i]->fix.line_length;
return;
}
}
}
#endif
/*
* If the screen info is not found in the registered FBs then fallback
* to the screen_info structure.
*
* The SYSFB_SIMPLEFB option, if enabled, marks VGA/VBE/EFI framebuffers as
* generic framebuffers so the new generic system-framebuffer drivers can
* be used instead. DRM_SIMPLEDRM drives the generic system-framebuffers
* device created by SYSFB_SIMPLEFB.
*
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
* information required by os_get_screen_info(), therefore you need to
* fall back onto the screen_info structure.
*
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
*/
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
/*
* If there is not a framebuffer console, return 0 size.
*
* orig_video_isVGA is set to 1 during early Linux kernel
* initialization, and then will be set to a value, such as
* VIDEO_TYPE_VLFB or VIDEO_TYPE_EFI if an fbdev console is used.
*/
if (screen_info.orig_video_isVGA > 1)
{
NvU64 physAddr = screen_info.lfb_base;
#if defined(VIDEO_CAPABILITY_64BIT_BASE)
physAddr |= (NvU64)screen_info.ext_lfb_base << 32;
#endif
/* Make sure base address is mapped to GPU BAR */
if ((physAddr == consoleBar1Address) ||
(physAddr == consoleBar2Address))
{
*pPhysicalAddress = physAddr;
*pFbWidth = screen_info.lfb_width;
*pFbHeight = screen_info.lfb_height;
*pFbDepth = screen_info.lfb_depth;
*pFbPitch = screen_info.lfb_linelength;
}
}
#endif
}
void NV_API_CALL os_dump_stack(void)
{
dump_stack();
@ -2182,6 +2101,22 @@ void NV_API_CALL os_nv_cap_close_fd
nv_cap_close_fd(fd);
}
NvS32 NV_API_CALL os_imex_channel_count
(
void
)
{
return nv_caps_imex_channel_count();
}
NvS32 NV_API_CALL os_imex_channel_get
(
NvU64 descriptor
)
{
return nv_caps_imex_channel_get((int)descriptor);
}
/*
* Reads the total memory and free memory of a NUMA node from the kernel.
*/

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -87,59 +87,10 @@ static NV_STATUS get_io_ptes(struct vm_area_struct *vma,
return NV_OK;
}
/*!
* @brief Pins user IO pages that have been mapped to the user processes virtual
* address space with remap_pfn_range.
*
* @param[in] vma VMA that contains the virtual address range given by the
* start and the page count.
* @param[in] start Beginning of the virtual address range of the IO pages.
* @param[in] page_count Number of pages to pin from start.
* @param[in,out] page_array Storage array for pointers to the pinned pages.
* Must be large enough to contain at least page_count
* pointers.
*
* @return NV_OK if the pages were pinned successfully, error otherwise.
*/
static NV_STATUS get_io_pages(struct vm_area_struct *vma,
NvUPtr start,
NvU64 page_count,
struct page **page_array)
{
NV_STATUS rmStatus = NV_OK;
NvU64 i, pinned = 0;
unsigned long pfn;
for (i = 0; i < page_count; i++)
{
if ((nv_follow_pfn(vma, (start + (i * PAGE_SIZE)), &pfn) < 0) ||
(!pfn_valid(pfn)))
{
rmStatus = NV_ERR_INVALID_ADDRESS;
break;
}
// Page-backed memory mapped to userspace with remap_pfn_range
page_array[i] = pfn_to_page(pfn);
get_page(page_array[i]);
pinned++;
}
if (pinned < page_count)
{
for (i = 0; i < pinned; i++)
put_page(page_array[i]);
rmStatus = NV_ERR_INVALID_ADDRESS;
}
return rmStatus;
}
NV_STATUS NV_API_CALL os_lookup_user_io_memory(
void *address,
NvU64 page_count,
NvU64 **pte_array,
void **page_array
NvU64 **pte_array
)
{
NV_STATUS rmStatus;
@ -187,18 +138,9 @@ NV_STATUS NV_API_CALL os_lookup_user_io_memory(
goto done;
}
if (pfn_valid(pfn))
{
rmStatus = get_io_pages(vma, start, page_count, (struct page **)result_array);
if (rmStatus == NV_OK)
*page_array = (void *)result_array;
}
else
{
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
if (rmStatus == NV_OK)
*pte_array = (NvU64 *)result_array;
}
rmStatus = get_io_ptes(vma, start, page_count, (NvU64 **)result_array);
if (rmStatus == NV_OK)
*pte_array = (NvU64 *)result_array;
done:
nv_mmap_read_unlock(mm);

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r551_06
#define NV_BUILD_BRANCH r551_40
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r551_06
#define NV_PUBLIC_BRANCH r551_40
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r551_06-132"
#define NV_BUILD_CHANGELIST_NUM (33773930)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r551_40-170"
#define NV_BUILD_CHANGELIST_NUM (33933991)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r550/r551_06-132"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
#define NV_BUILD_NAME "rel/gpu_drv/r550/r551_40-170"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33933991)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r551_06-14"
#define NV_BUILD_CHANGELIST_NUM (33773930)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "551.23"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33773930)
#define NV_BUILD_BRANCH_VERSION "r551_40-13"
#define NV_BUILD_CHANGELIST_NUM (33924744)
#define NV_BUILD_TYPE "Nightly"
#define NV_BUILD_NAME "r551_40-240221"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33921227)
#define NV_BUILD_BRANCH_BASE_VERSION R550
#endif
// End buildmeister python edited section

View File

@ -94,8 +94,9 @@ static inline void NvTimeSemFermiSetMaxSubmitted(
NvTimeSemFermiSetMaxSubmittedVal(&report->timer, value);
}
static inline NvU64 NvTimeSemFermiGetPayload(
NvReportSemaphore32 *report)
static inline NvU64 NvTimeSemFermiGetPayloadVal(
volatile void *payloadPtr,
volatile void *maxSubmittedPtr)
{
// The ordering of the two operations below is critical. Other threads
// may be submitting GPU work that modifies the semaphore value, or
@ -129,11 +130,11 @@ static inline NvU64 NvTimeSemFermiGetPayload(
// adjust the max submitted value back down if a wrap occurs between these
// two operations, but has no way to bump the max submitted value up if a
// wrap occurs with the opposite ordering.
NvU64 current = report->payload;
NvU64 current = *(volatile NvU32*)payloadPtr;
// Use an atomic exchange to ensure the 64-bit read is atomic even on 32-bit
// CPUs.
NvU64 submitted = (NvU64)
__NVatomicCompareExchange64((volatile NvS64 *)&report->timer, 0ll, 0ll);
__NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr, 0ll, 0ll);
nvAssert(!(current & 0xFFFFFFFF00000000ull));
@ -152,6 +153,12 @@ static inline NvU64 NvTimeSemFermiGetPayload(
return current;
}
static inline NvU64 NvTimeSemFermiGetPayload(
NvReportSemaphore32 *report)
{
return NvTimeSemFermiGetPayloadVal(&report->payload, &report->timer);
}
static inline void NvTimeSemFermiSetPayload(
NvReportSemaphore32 *report,
const NvU64 payload)
@ -167,12 +174,19 @@ static inline void NvTimeSemFermiSetPayload(
* Volta and up.
*/
static inline NvU64 NvTimeSemVoltaGetPayloadVal(
volatile void *payloadPtr)
{
nvAssert(payloadPtr);
return (NvU64)
__NVatomicCompareExchange64((volatile NvS64 *)payloadPtr,
0, 0);
}
static inline NvU64 NvTimeSemVoltaGetPayload(
NvReportSemaphore64 *report)
{
return (NvU64)
__NVatomicCompareExchange64((volatile NvS64 *)&report->reportValue,
0, 0);
return NvTimeSemVoltaGetPayloadVal(&report->reportValue);
}
static inline void NvTimeSemVoltaSetPayload(

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "550.40.07"
#define NV_VERSION_STRING "550.54.14"
#else

View File

@ -0,0 +1,32 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef GA100_DEV_CTXSW_PROG_H
#define GA100_DEV_CTXSW_PROG_H
#define NV_CTXSW_TIMESTAMP_BUFFER_RD_WR_POINTER 30:0 /* */
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE 31:31 /* */
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_ENABLED 0x1 /* */
#define NV_CTXSW_TIMESTAMP_BUFFER_MAILBOX1_TRACE_FEATURE_DISABLED 0x0 /* */
#endif

View File

@ -123,9 +123,10 @@
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_FALSE 0x00000000 /* -WE-V */
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_TRIGGER_TRUE 0x00000001 /* -W--V */
#define NV_VIRTUAL_FUNCTION_PRIV_MMU_INVALIDATE_MAX_CACHELINE_SIZE 0x00000010 /* */
#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL 0x2200 /* -W-4R */
#define NV_VIRTUAL_FUNCTION_TIME_0 0x30080 /* R--4R */
#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC 31:5 /* R-XUF */
#define NV_VIRTUAL_FUNCTION_TIME_1 0x30084 /* R--4R */
#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC 28:0 /* R-XUF */
#define NV_VIRTUAL_FUNCTION_PRIV_DOORBELL 0x2200 /* -W-4R */
#define NV_VIRTUAL_FUNCTION_TIME_0 0x30080 /* R--4R */
#define NV_VIRTUAL_FUNCTION_TIME_0_NSEC 31:5 /* R-XUF */
#define NV_VIRTUAL_FUNCTION_TIME_1 0x30084 /* R--4R */
#define NV_VIRTUAL_FUNCTION_TIME_1_NSEC 28:0 /* R-XUF */
#define NV_VIRTUAL_FUNCTION_PRIV_MAILBOX_SCRATCH(i) (0x2100+(i)*4) /* RW-4A */
#endif // __ga100_dev_vm_h__

View File

@ -991,7 +991,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
pInfo->cc_white_y |= (p->Chromaticity[1] & NVT_PVT_EDID_CC_WHITE_Y1_Y0_MASK) >> NVT_PVT_EDID_CC_WHITE_Y1_Y0_SHIFT;
// copy established timings
pInfo->established_timings_1_2 = (NvU16)p->bEstablishedTimings1 << 8;
pInfo->established_timings_1_2 = (NvU16)p->bEstablishedTimings1 << 8;
pInfo->established_timings_1_2 |= (NvU16)p->bEstablishedTimings2;
// copy manuf reserved timings
@ -1039,7 +1039,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
p861Info = (k == 0) ? &pInfo->ext861 : &pInfo->ext861_2;
get861ExtInfo(pExt, sizeof(EDIDV1STRUC), p861Info);
// HF EEODB is present in edid v1.3 and v1.4 does not need this.Also, it is always present in the 1st CTA extension block.
if (j == 1 && pInfo->version == NVT_EDID_VER_1_3)
{
@ -1106,11 +1106,6 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
pInfo->ext_displayid20.interface_features.yuv420_min_pclk = 0;
}
if (pInfo->ext861.revision == 0 && pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
{
pInfo->ext861.revision = NVT_CEA861_REV_B;
}
if (pInfo->ext_displayid20.valid_data_blocks.interface_feature_present)
{
pInfo->ext861.basic_caps |= pInfo->ext_displayid20.basic_caps;
@ -1157,7 +1152,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
}
}
// Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[]
// Copy all the timings(could include type 7/8/9/10) from displayid20->timings[] to pEdidInfo->timings[]
for (i = 0; i < pInfo->ext_displayid20.total_timings; i++)
{
if (!assignNextAvailableTiming(pInfo, &(pInfo->ext_displayid20.timing[i])))
@ -1215,7 +1210,7 @@ NVT_STATUS NV_STDCALL NvTiming_ParseEDIDInfo(NvU8 *pEdid, NvU32 length, NVT_EDID
CODE_SEGMENT(PAGE_DD_CODE)
void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
{
{
NvU32 i, j, data;
for (i = 0; i < pInfo->total_timings; i++)
@ -1226,8 +1221,8 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
case NVT_TYPE_HDMI_STEREO:
case NVT_TYPE_HDMI_EXT:
// VTB timing use the base EDID (block 0) to determine the color format support
case NVT_TYPE_EDID_VTB_EXT:
case NVT_TYPE_EDID_VTB_EXT_STD:
case NVT_TYPE_EDID_VTB_EXT:
case NVT_TYPE_EDID_VTB_EXT_STD:
case NVT_TYPE_EDID_VTB_EXT_DTD:
case NVT_TYPE_EDID_VTB_EXT_CVT:
// pInfo->u.feature_ver_1_3.color_type provides mono, rgb, rgy, undefined
@ -1245,7 +1240,7 @@ void updateColorFormatAndBpcTiming(NVT_EDID_INFO *pInfo)
}
updateBpcForTiming(pInfo, i);
break;
default:
default:
// * the displayID_v1.3/v2.0 EDID extension need to follow the EDID bpc definition.
// * all other default to base edid
updateBpcForTiming(pInfo, i);
@ -1319,7 +1314,7 @@ NvBool isMatchedStandardTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)
for (j = 0; j < pInfo->total_timings; j++)
{
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD &&
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_STD &&
NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
{
return NV_TRUE;
@ -1335,7 +1330,7 @@ NvBool isMatchedEstablishedTiming(NVT_EDID_INFO *pInfo, NVT_TIMING *pT)
for (j = 0; j < pInfo->total_timings; j++)
{
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST &&
if (NVT_GET_TIMING_STATUS_TYPE(pInfo->timing[j].etc.status) == NVT_TYPE_EDID_EST &&
NvTiming_IsTimingRelaxedEqual(&pInfo->timing[j], pT))
{
return NV_TRUE;
@ -1405,7 +1400,7 @@ void updateBpcForTiming(NVT_EDID_INFO *pInfo, NvU32 index)
}
}
else if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_UNDEFINED) &&
p861Info->revision >= NVT_CEA861_REV_A)
{
@ -1462,7 +1457,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,
// the timing mapping index :
//
// native_cta - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators
// native_cta - the "native resoluiotn of the sink" in the CTA861.6 A Source shall override any other native video resolution indicators
// if the Source supports NVRDB and the NVRDB was found in the E-EDID
// preferred_cta - the "prefer SVD" in CTA-861-F (i.e. A Sink that prefers a Video Format that is not listed as an SVD in Video Data Block, but instead listed in YCBCR 4:2:0 VDB)
// preferred_displayid_dtd - the "prefer detailed timing of DispalyID" extension
@ -1546,7 +1541,7 @@ NVT_STATUS NvTiming_GetEdidTimingExWithPclk(NvU32 width, NvU32 height, NvU32 rr,
if (native_cta == pEdidInfo->total_timings && NVT_NATIVE_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
{
native_cta = i;
}
}
if (preferred_cta == pEdidInfo->total_timings && NVT_PREFERRED_TIMING_IS_CTA(pEdidTiming[i].etc.flag))
{
@ -2063,10 +2058,10 @@ NVT_STATUS NvTiming_GetEDIDBasedASPRTiming( NvU16 width, NvU16 height, NvU16 rr,
*
* @brief check EDID raw data is valid or not, and it will return the err flags if it existed
* @param pEdid : this is a pointer to EDID data
* @param length : read length of EDID
* @param length : read length of EDID
* @param bIsTrongValidation : true - added more check
* false- only header and checksum and size check
*
*
*/
CODE_SEGMENT(PAGE_DD_CODE)
NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongValidation)
@ -2086,12 +2081,12 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal
return ret;
}
// check the EDID version and signature
// check the EDID version and signature
if (getEdidVersion(pEdid, &version) != NVT_STATUS_SUCCESS)
{
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
return ret;
}
}
// check block 0 checksum value
if (!isChecksumValid(pEdid))
@ -2239,11 +2234,11 @@ NvU32 NvTiming_EDIDValidationMask(NvU8 *pEdid, NvU32 length, NvBool bIsStrongVal
/**
*
* @brief sanity check EDID binary frequently used data block is valid or not,
* @brief sanity check EDID binary frequently used data block is valid or not,
* and it will return error checkpoint flag if it existed
* @param pEdid : this is a pointer to EDID raw data
* @param length : read length of EDID
*
*
*/
CODE_SEGMENT(PAGE_DD_CODE)
NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
@ -2255,7 +2250,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
DETAILEDTIMINGDESCRIPTOR *pDTD;
// For CTA861
NvU8 ctaDTD_Offset;
NvU8 *pData_collection;
NvU8 *pData_collection;
NvU32 ctaBlockTag, ctaPayload, vic;
// For DisplayID
DIDEXTENSION *pDisplayid;
@ -2283,7 +2278,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
{
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_VERSION);
}
// 18bytes in DTD or Display Descriptor check
for (i = 0; i < NVT_EDID_MAX_LONG_DISPLAY_DESCRIPTOR; i++)
{
@ -2313,7 +2308,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
}
}
else
{
{
pLdd = (EDID_LONG_DISPLAY_DESCRIPTOR *)&p->DetailedTimingDesc[i];
// This block is a display descriptor, validate
@ -2327,7 +2322,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
NvU8 max_v_rate_offset, min_v_rate_offset, max_h_rate_offset, min_h_rate_offset;
// add 255Hz offsets as needed before doing the check, use descriptor->rsvd2
nvt_assert(!(pLdd->rsvd2 & 0xF0));
nvt_assert(!(pLdd->rsvd2 & 0xF0));
max_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MAX ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
min_v_rate_offset = pLdd->rsvd2 & NVT_PVT_EDID_RANGE_OFFSET_VER_MIN ? NVT_PVT_EDID_RANGE_OFFSET_AMOUNT : 0;
@ -2340,19 +2335,19 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
pRangeLimit->maxHRate == 0)
{
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_RANGE_LIMIT);
}
}
}
}
}
// extension and size check
if ((NvU32)(p->bExtensionFlag + 1) * sizeof(EDIDV1STRUC) > length)
{
{
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT);
}
// we shall not trust any extension blocks with wrong input EDID size
if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) ||
// we shall not trust any extension blocks with wrong input EDID size
if (NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_SIZE) ||
NVT_IS_EDID_VALIDATION_FLAGS(ret, NVT_EDID_VALIDATION_ERR_EXTENSION_COUNT))
return ret;
@ -2384,7 +2379,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
// validate SVD block
ctaBlockTag = NVT_CEA861_GET_SHORT_DESCRIPTOR_TAG(((EIA861EXTENSION *)pExt)->data[0]);
pData_collection = ((EIA861EXTENSION *)pExt)->data;
while ((ctaDTD_Offset - 4) > 0 && pData_collection != &pExt[ctaDTD_Offset] &&
ctaBlockTag > NVT_CEA861_TAG_RSVD && ctaBlockTag <= NVT_CEA861_TAG_EXTENDED_FLAG)
{
@ -2451,7 +2446,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DTD);
else
{
// check the max image size and
// check the max image size and
if (p->bMaxHorizImageSize != 0 && p->bMaxVertImageSize != 0)
{
NvU16 hDTDImageSize = (pDTD->bDTHorizVertImage & 0xF0) << 4 | pDTD->bDTHorizontalImage;
@ -2466,7 +2461,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
if(!isChecksumValid(pExt))
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_CTA_CHECKSUM);
break;
break;
case NVT_EDID_EXTENSION_DISPLAYID:
pDisplayid = ((DIDEXTENSION *)pExt);
if (pDisplayid->ext_count != 0)
@ -2483,10 +2478,10 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
{
if ((pDisplayid->struct_version & 0xFF) == 0x21)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID_VERSION);
did2ExtCount++;
if (pDisplayid->use_case == 0 && did2ExtCount == 1)
if (pDisplayid->use_case == 0 && did2ExtCount == 1)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_USE_CASE);
// check the DisplayId2 valid timing
@ -2506,7 +2501,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TYPE7);
if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_RANGE_LIMITS)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
if (pDID2Header->type == DISPLAYID_2_0_BLOCK_TYPE_ADAPTIVE_SYNC)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_ADAPTIVE_SYNC);
@ -2527,9 +2522,9 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
}
// if the first tag failed, ignore all the tags afterward then
if (!bAllZero &&
(pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY ||
(pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE &&
if (!bAllZero &&
(pDID2Header->type < DISPLAYID_2_0_BLOCK_TYPE_PRODUCT_IDENTITY ||
(pDID2Header->type > DISPLAYID_2_0_BLOCK_TYPE_BRIGHTNESS_LUMINANCE_RANGE &&
pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_VENDOR_SPEC &&
pDID2Header->type != DISPLAYID_2_0_BLOCK_TYPE_CTA_DATA)) &&
(pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
@ -2537,7 +2532,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID2_TAG);
continue;
}
}
}
else if ((pDisplayid->struct_version & 0xFF) == 0x12 || (pDisplayid->struct_version & 0xFF) == 0x13)
{
if ((pDisplayid->struct_version & 0xFF) == 0x13)
@ -2559,7 +2554,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
if (pHeader->type == NVT_DISPLAYID_BLOCK_TYPE_RANGE_LIMITS)
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_RANGE_LIMIT);
// add more data blocks tag here to evaluate
}
pData_collection += block_length;
@ -2580,7 +2575,7 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
if (!bAllZero &&
pHeader->type > NVT_DISPLAYID_BLOCK_TYPE_TILEDDISPLAY &&
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_CTA_DATA &&
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&
pHeader->type != NVT_DISPLAYID_BLOCK_TYPE_VENDOR_SPEC &&
(pData_collection - pExt < (int)sizeof(DIDEXTENSION)))
{
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_DID13_TAG);
@ -2939,7 +2934,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)
// Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);
// Wipe out the checksums
CommonEDIDBuffer[CommonEDIDBuffer[1]+5/*mandatory bytes*/-1] = 0;
CommonEDIDBuffer[0xFF] = 0;
@ -2954,7 +2949,7 @@ NvU32 NvTiming_CalculateCommonEDIDCRC32(NvU8* pEDIDBuffer, NvU32 edidVersion)
// displayId2 standalone uses 256 length sections
commonEDIDBufferSize = 256;
}
else
else
{
// Wipe out the Serial Number, Week of Manufacture, and Year of Manufacture or Model Year
NVMISC_MEMSET(CommonEDIDBuffer + 0x0C, 0, 6);

View File

@ -111,7 +111,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
nvt_assert((timingIdx) <= COUNT(pInfo->timing));
if ((pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_A_SUPPORTED ||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
pInfo->input.u.digital.video_interface == NVT_EDID_DIGITAL_VIDEO_INTERFACE_STANDARD_HDMI_B_SUPPORTED ||
pInfo->ext861.valid.H14B_VSDB || pInfo->ext861.valid.H20_HF_VSDB) && pInfo->ext861.revision >= NVT_CEA861_REV_A)
{
if (!pInfo->ext_displayid.supported_displayId2_0)
@ -153,7 +153,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_10b,
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_12b,
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_14b,
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);
pDisplayIdInfo->u4.display_interface_features.rgb_depth.support_16b);
}
}
@ -174,7 +174,7 @@ void updateColorFormatForDisplayIdExtnTimings(NVT_EDID_INFO *pInfo,
pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_14b,
pDisplayIdInfo->u4.display_interface.ycbcr422_depth.support_16b);
}
else
else
{
// yuv444
UPDATE_BPC_FOR_COLORFORMAT(pT->etc.yuv444, 0, /* yuv444 does not support 6bpc */
@ -264,7 +264,7 @@ static NVT_STATUS parseDisplayIdSection(DISPLAYID_SECTION * section,
* @brief Parses a displayID data block
* @param block The DisplayID data block to parse
* @param max_length The indicated total length of the each data block for checking
* @param pLength return the indicated length of the each data block
* @param pLength return the indicated length of the each data block
* @param pEdidInfo EDID struct containing DisplayID information and
* the timings or validation purpose if it is NULL
*/
@ -285,7 +285,7 @@ NVT_STATUS parseDisplayIdBlock(NvU8* pBlock,
return NVT_STATUS_ERR;
pInfo = pEdidInfo == NULL ? NULL : &pEdidInfo->ext_displayid;
*pLength = hdr->data_bytes + NVT_DISPLAYID_DATABLOCK_HEADER_LEN;
switch (hdr->type)
@ -386,9 +386,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf
for (i = 0; i < prim_num; i++)
{
x_p = (blk->points)[i].color_x_bits_low +
x_p = (blk->points)[i].color_x_bits_low +
(DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
((blk->points)[i].color_y_bits_high << 4);
pInfo->primaries[i].x = x_p;
pInfo->primaries[i].y = y_p;
@ -396,9 +396,9 @@ static NVT_STATUS parseDisplayIdColorChar(NvU8 * block, NVT_DISPLAYID_INFO *pInf
for (j = 0; j < wp_num; j++)
{
x_p = (blk->points)[i].color_x_bits_low +
x_p = (blk->points)[i].color_x_bits_low +
(DRF_VAL(T_DISPLAYID, _COLOR, _POINT_X, (blk->points)[i].color_bits_mid) << 8);
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
y_p = DRF_VAL(T_DISPLAYID, _COLOR, _POINT_Y, (blk->points)[i].color_bits_mid) +
((blk->points)[i].color_y_bits_high << 4);
pInfo->white_points[pInfo->total_primaries + j].x = x_p;
pInfo->white_points[pInfo->total_primaries + j].y = y_p;
@ -508,7 +508,6 @@ static NVT_STATUS parseDisplayIdTiming1(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
CODE_SEGMENT(PAGE_DD_CODE)
static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR * type1, NVT_TIMING *pT)
{
NvU32 totalPixels_in_2_fields;
if (type1 == NULL || pT == NULL)
return NVT_STATUS_ERR;
@ -569,30 +568,17 @@ static NVT_STATUS parseDisplayIdTiming1Descriptor(DISPLAYID_TIMING_1_DESCRIPTOR
}
// the refresh rate
if (pT->interlaced)
{
// in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
// calculate the field rate in interlaced mode
pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
}
else
{
// calculate frame rate in progressive mode
// in progressive mode filed = frame
pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
}
pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
pT->etc.name[39] = '\0';
pT->etc.rep = 0x1; // bit mask for no pixel repetition
pT->etc.status = NVT_STATUS_DISPLAYID_1;
// Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have dependency on sequence
// so we'll just update the preferred flag, not sequence them
//pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
pT->etc.flag |= type1->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
/* Fields currently not used. Uncomment them for future use
type1->options.stereo_support;
*/
@ -651,7 +637,6 @@ static NVT_STATUS parseDisplayIdTiming2(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
CODE_SEGMENT(PAGE_DD_CODE)
static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR * type2, NVT_TIMING *pT)
{
NvU32 totalPixels_in_2_fields;
if (type2 == NULL || pT == NULL)
return NVT_STATUS_ERR;
@ -679,32 +664,19 @@ static NVT_STATUS parseDisplayIdTiming2Descriptor(DISPLAYID_TIMING_2_DESCRIPTOR
pT->interlaced = type2->options.interface_frame_scanning_type;
// the refresh rate
if (pT->interlaced)
{
// in interlaced mode, adjust for one extra line in every other frame. pT->VTotal is field based here
totalPixels_in_2_fields = (NvU32)pT->HTotal * ((NvU32)pT->VTotal * 2 + 1);
// calculate the field rate in interlaced mode
pT->etc.rr = (NvU16)axb_div_c(pT->pclk * 2, 10000, totalPixels_in_2_fields);
pT->etc.rrx1k = axb_div_c(pT->pclk * 2, 10000000, totalPixels_in_2_fields);
}
else
{
// calculate frame rate in progressive mode
// in progressive mode filed = frame
pT->etc.rr = (NvU16)axb_div_c(pT->pclk, 10000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
pT->etc.rrx1k = axb_div_c(pT->pclk, 10000000, (NvU32)pT->HTotal * (NvU32)pT->VTotal);
}
pT->etc.rr = NvTiming_CalcRR(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
pT->etc.rrx1k = NvTiming_CalcRRx1k(pT->pclk, pT->interlaced, pT->HTotal, pT->VTotal);
pT->etc.aspect = 0;
pT->etc.name[39] = '\0';
pT->etc.rep = 0x1; // Bit mask for no pixel repetition
pT->etc.status = NVT_STATUS_DISPLAYID_2;
// Unlike the PTM in EDID base block, DisplayID type I/II preferred timing does not have dependency on sequence
// so we'll just update the preferred flag, not sequence them
//pT->etc.status = NVT_STATUS_DISPLAYID_1N(1);
pT->etc.flag |= type2->options.is_preferred_detailed_timing ? NVT_FLAG_DISPLAYID_DTD_PREFERRED_TIMING : 0;
/* Fields currently not used. Uncomment them for future use
type1->options.stereo_support;
*/
@ -861,12 +833,12 @@ static NVT_STATUS parseDisplayIdTiming5Descriptor(DISPLAYID_TIMING_5_DESCRIPTOR
{
NvU32 width, height, rr;
NvBool is1000div1001 = NV_FALSE;
// we don't handle stereo type nor custom reduced blanking yet
//NvU8 stereoType, formula;
//stereoType = (desc->optns & NVT_DISPLAYID_TIMING_5_STEREO_SUPPORT_MASK);
//formula = desc->optns & NVT_DISPLAYID_TIMING_5_FORMULA_SUPPORT_MASK;
if (desc->optns & NVT_DISPLAYID_TIMING_5_FRACTIONAL_RR_SUPPORT_MASK)
{
is1000div1001 = NV_TRUE;
@ -892,7 +864,7 @@ static NVT_STATUS parseDisplayIdTiming5(NvU8 * block, NVT_EDID_INFO *pEdidInfo)
for (i = 0; i * sizeof(DISPLAYID_TIMING_5_DESCRIPTOR) < blk->header.data_bytes; i++)
{
NVMISC_MEMSET(&newTiming, 0, sizeof(newTiming));
if (parseDisplayIdTiming5Descriptor(blk->descriptors + i, &newTiming) == NVT_STATUS_SUCCESS)
{
if (pEdidInfo == NULL) continue;
@ -1030,7 +1002,7 @@ static NVT_STATUS parseDisplayIdRangeLimits(NvU8 * block, NVT_DISPLAYID_INFO *pI
rl = pInfo->range_limits + pInfo->rl_num;
(pInfo->rl_num)++;
rl->pclk_min = minPclk;
rl->pclk_max = maxPclk;
@ -1105,7 +1077,7 @@ static NVT_STATUS parseDisplayIdDeviceData(NvU8 * block, NVT_DISPLAYID_INFO *pIn
pInfo->device_op_mode = DRF_VAL(T_DISPLAYID, _DEVICE, _OPERATING_MODE, blk->operating_mode);
pInfo->support_backlight = DRF_VAL(T_DISPLAYID, _DEVICE, _BACKLIGHT, blk->operating_mode);
pInfo->support_intensity = DRF_VAL(T_DISPLAYID, _DEVICE, _INTENSITY, blk->operating_mode);
pInfo->horiz_pixel_count = blk->horizontal_pixel_count;
pInfo->vert_pixel_count = blk->vertical_pixel_count;
@ -1278,7 +1250,7 @@ static NVT_STATUS parseDisplayIdStereo(NvU8 * block, NVT_DISPLAYID_INFO *pInfo)
nvt_assert(0);
return NVT_STATUS_ERR;
}
return NVT_STATUS_SUCCESS;
}
@ -1322,7 +1294,7 @@ static NVT_STATUS parseDisplayIdTiledDisplay(NvU8 * block, NVT_DISPLAYID_INFO *p
pInfo->bezel_info.left = (blk->bezel_info.left * blk->bezel_info.pixel_density) / 10;
pInfo->tile_topology_id.vendor_id = (blk->topology_id.vendor_id[2] << 16) |
(blk->topology_id.vendor_id[1] << 8 ) |
(blk->topology_id.vendor_id[1] << 8 ) |
blk->topology_id.vendor_id[0];
pInfo->tile_topology_id.product_id = (blk->topology_id.product_id[1] << 8) | blk->topology_id.product_id[0];
@ -1350,7 +1322,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)
if (pInfo == NULL) return NVT_STATUS_SUCCESS;
p861info = &pInfo->ext861;
pInfo->ext_displayid.cea_data_block_present = 1;
p861info->revision = blk->revision;
@ -1366,7 +1338,7 @@ static NVT_STATUS parseDisplayIdCtaData(NvU8 * block, NVT_EDID_INFO *pInfo)
//parse HDR related information from the HDR static metadata data block
parseCea861HdrStaticMetadataDataBlock(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
// base video
parse861bShortTiming(p861info, pInfo, FROM_DISPLAYID_13_DATA_BLOCK);
// yuv420-only video
@ -1422,7 +1394,7 @@ static NVT_STATUS parseDisplayIdDisplayInterfaceFeatures(NvU8 * block, NVT_DISPL
// Minimum Pixel Rate at Which YCbCr 4:2:0 Encoding Is Supported
pInfo->u4.display_interface_features.minimum_pixel_rate_ycbcr420 = blk->minimum_pixel_rate_ycbcr420;
// Audio capability
pInfo->u4.display_interface_features.audio_capability.support_32khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_32KHZ, blk->supported_audio_capability);
pInfo->u4.display_interface_features.audio_capability.support_44_1khz = DRF_VAL(T_DISPLAYID, _INTERFACE_FEATURES, _AUDIO_SUPPORTED_44_1KHZ, blk->supported_audio_capability);

View File

@ -235,7 +235,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)
if (totalPixelsIn2Fields != 0)
{
rr = (NvU16)axb_div_c(pclk * 2, 10000, totalPixelsIn2Fields);
rr = (NvU16)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000, (NvU64)totalPixelsIn2Fields);
}
}
else
@ -244,7 +244,7 @@ NvU16 NvTiming_CalcRR(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTotal)
if (totalPixels != 0)
{
rr = (NvU16)axb_div_c(pclk, 10000, totalPixels);
rr = (NvU16)axb_div_c_64((NvU64)pclk, (NvU64)10000, (NvU64)totalPixels);
}
}
return rr;
@ -261,7 +261,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota
if (totalPixelsIn2Fields != 0)
{
rrx1k = (NvU32)axb_div_c(pclk * 2, 10000000, totalPixelsIn2Fields);
rrx1k = (NvU32)axb_div_c_64((NvU64)pclk * 2, (NvU64)10000000, (NvU64)totalPixelsIn2Fields);
}
}
else
@ -270,7 +270,7 @@ NvU32 NvTiming_CalcRRx1k(NvU32 pclk, NvU16 interlaced, NvU16 HTotal, NvU16 VTota
if (totalPixels != 0)
{
rrx1k = (NvU32)axb_div_c(pclk, 10000000, totalPixels);
rrx1k = (NvU32)axb_div_c_64((NvU64)pclk, (NvU64)10000000, (NvU64)totalPixels);
}
}

View File

@ -70,8 +70,8 @@ extern "C" {
// Link Transition Timeouts in miliseconds
#define NVLINK_TRANSITION_OFF_TIMEOUT 1
#define NVLINK_TRANSITION_SAFE_TIMEOUT 300
#define NVLINK_TRANSITION_HS_TIMEOUT 8000
#define NVLINK_TRANSITION_SAFE_TIMEOUT 70
#define NVLINK_TRANSITION_HS_TIMEOUT 7000
#define NVLINK_TRANSITION_ACTIVE_PENDING 2000
#define NVLINK_TRANSITION_POST_HS_TIMEOUT 70

View File

@ -222,8 +222,7 @@ _cci_module_cable_detect
}
default:
{
NVSWITCH_ASSERT(0);
break;
return -NVL_ERR_NOT_SUPPORTED;
}
}
@ -348,8 +347,9 @@ _cci_module_identify
// Mark as faulty
device->pCci->isFaulty[moduleId] = NV_TRUE;
NVSWITCH_PRINT(device, ERROR,
"%s: Module HW check failed. Module %d\n", __FUNCTION__, moduleId);
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
"Module %d faulty\n", moduleId);
return -NVL_ERR_GENERIC;
}
@ -612,6 +612,9 @@ _cci_module_identify_async
NvlStatus retval;
PCCI pCci = device->pCci;
CCI_MODULE_ONBOARD_STATE nextState;
CCI_MODULE_STATE *pOnboardState;
pOnboardState = &device->pCci->moduleState[moduleId];
nvswitch_os_memset(&nextState, 0, sizeof(CCI_MODULE_ONBOARD_STATE));
@ -637,8 +640,9 @@ _cci_module_identify_async
}
default:
{
// Not expected
NVSWITCH_ASSERT(0);
// Invalid cable type
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
break;
}
@ -646,6 +650,8 @@ _cci_module_identify_async
}
else
{
pOnboardState->onboardError.bOnboardFailure = NV_TRUE;
pOnboardState->onboardError.failedOnboardState = pOnboardState->currOnboardState;
nextState.onboardPhase = CCI_ONBOARD_PHASE_CHECK_CONDITION;
}

View File

@ -7727,11 +7727,11 @@ nvswitch_ctrl_get_err_info_lr10
}
// TODO NVidia TL not supported
NVSWITCH_PRINT(device, WARN,
NVSWITCH_PRINT(device, NOISY,
"%s WARNING: Nvidia %s register %s does not exist!\n",
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_ERRLOG_REG");
NVSWITCH_PRINT(device, WARN,
NVSWITCH_PRINT(device, NOISY,
"%s WARNING: Nvidia %s register %s does not exist!\n",
__FUNCTION__, "NVLTL", "NV_NVLTL_TL_INTEN_REG");

View File

@ -1638,6 +1638,9 @@ nvswitch_cci_module_access_cmd_ls10
// Mark as faulty
device->pCci->isFaulty[osfp] = NV_TRUE;
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_CCI_MODULE,
"Module %d access error\n", osfp);
return -NVL_IO_ERROR;
}

View File

@ -5549,6 +5549,29 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
}
}
static void
_nvswitch_dump_minion_ali_debug_registers_ls10
(
nvswitch_device *device,
NvU32 link
)
{
NVSWITCH_MINION_ALI_DEBUG_REGISTERS params;
nvlink_link *nvlink = nvswitch_get_link(device, link);
if ((nvlink != NULL) &&
(nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, &params) == NVL_SUCCESS))
{
NVSWITCH_PRINT(device, ERROR,
"%s: Minion error on link #%d!:\n"
"Minion DLSTAT MN00 = 0x%x\n"
"Minion DLSTAT UC01 = 0x%x\n"
"Minion DLSTAT UC01 = 0x%x\n",
__FUNCTION__, link,
params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr);
}
}
static void
_nvswitch_emit_link_errors_minion_fatal_ls10
(
@ -5611,6 +5634,8 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
enabledLinks &= ~bit;
regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks);
NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData);
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
}
static void
@ -5647,8 +5672,8 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
{
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
break;
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
break;
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt");
break;
@ -5660,6 +5685,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
break;
}
_nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
}
static void

View File

@ -42,6 +42,11 @@
#include "nvswitch/ls10/dev_minion_ip_addendum.h"
#include "ls10/minion_nvlink_defines_public_ls10.h"
#define NV_NVLINK_TLREQ_TIMEOUT_ACTIVE 10000
#define NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN 10
#define NV_NVLINK_TLREQ_TIMEOUT_RESET 4
#define NV_NVLINK_TLREQ_TIMEOUT_L2 5
static void
_nvswitch_configure_reserved_throughput_counters
(
@ -143,9 +148,9 @@ nvswitch_init_lpwr_regs_ls10
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: Failed to set L1 Threshold\n",
__FUNCTION__);
__FUNCTION__);
}
}
}
void
nvswitch_corelib_training_complete_ls10
@ -1433,7 +1438,7 @@ nvswitch_load_link_disable_settings_ls10
nvswitch_device *device,
nvlink_link *link
)
{
{
NvU32 regVal;
// Read state from NVLIPT HW
@ -1443,7 +1448,7 @@ nvswitch_load_link_disable_settings_ls10
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
{
NVSWITCH_ASSERT(!cciIsLinkManaged(device, link->linkNumber));
// Set link to invalid and unregister from corelib
device->link[link->linkNumber].valid = NV_FALSE;
nvlink_lib_unregister_link(link);
@ -1589,7 +1594,7 @@ nvswitch_reset_and_train_link_ls10
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
(link_intr_subcode == MINION_ALARM_BUSY))
(link_intr_subcode == MINION_ALARM_BUSY))
{
status = nvswitch_request_tl_link_state_ls10(link,
@ -1683,6 +1688,39 @@ nvswitch_are_link_clocks_on_ls10
return NV_TRUE;
}
static
NvlStatus
_nvswitch_tl_request_get_timeout_value_ls10
(
nvswitch_device *device,
NvU32 tlLinkState,
NvU32 *timeoutVal
)
{
switch (tlLinkState)
{
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_ACTIVE:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_ACTIVE;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_RESET;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_SHUTDOWN;
break;
case NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_L2:
*timeoutVal = NV_NVLINK_TLREQ_TIMEOUT_L2;
break;
default:
NVSWITCH_PRINT(device, ERROR,
"%s: Invalid tlLinkState %d provided!\n",
__FUNCTION__, tlLinkState);
return NVL_BAD_ARGS;
}
return NVL_SUCCESS;
}
NvlStatus
nvswitch_request_tl_link_state_ls10
(
@ -1696,6 +1734,9 @@ nvswitch_request_tl_link_state_ls10
NvU32 linkStatus;
NvU32 lnkErrStatus;
NvU32 bit;
NvU32 timeoutVal;
NVSWITCH_TIMEOUT timeout;
NvBool keepPolling;
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
{
@ -1729,17 +1770,43 @@ nvswitch_request_tl_link_state_ls10
if (bSync)
{
// Wait for the TL link state register to complete
status = nvswitch_wait_for_tl_request_ready_lr10(link);
// setup timeouts for the TL request
status = _nvswitch_tl_request_get_timeout_value_ls10(device, tlLinkState, &timeoutVal);
if (status != NVL_SUCCESS)
{
return status;
return NVL_ERR_INVALID_STATE;
}
nvswitch_timeout_create(NVSWITCH_INTERVAL_1MSEC_IN_NS * timeoutVal, &timeout);
status = NVL_MORE_PROCESSING_REQUIRED;
do
{
keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) ==
tlLinkState)
{
status = NVL_SUCCESS;
break;
}
nvswitch_os_sleep(1);
}
while(keepPolling);
// Do one final check if the polling loop didn't see the target linkState
if (status == NVL_MORE_PROCESSING_REQUIRED)
{
// Check for state requested
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
tlLinkState)
{
@ -1750,6 +1817,8 @@ nvswitch_request_tl_link_state_ls10
}
}
}
return status;
}

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -36,7 +36,36 @@ extern "C" {
#define RUSD_TIMESTAMP_WRITE_IN_PROGRESS (NV_U64_MAX)
#define RUSD_TIMESTAMP_INVALID 0
#define RUSD_SEQ_DATA_VALID(x) ((((NvU32)(x)) & 0x1U) == 0)
// seq = c_0 * b_0 + c_1 * (b_0 - 1) where c_0 == open_count and c_1 == close_count
// When they are equal, data is valid, otherwise data is being written.
// b_0 == 1 mod (b_0 - 1) and b_0 - 1 == (-1) mod b_0
// So, c_0 == seq mod (b_0 - 1) and c_1 == (-1 * seq) mod b_0
// c_1 cannot be calculated quite so naively because negative modulos aren't fun, so we
// instead do c_1 == (b_0 - (seq mod b_0)) mod b_0
//
#define RUSD_SEQ_BASE_SHIFT 20llu
#define RUSD_SEQ_BASE0 (1llu << RUSD_SEQ_BASE_SHIFT)
#define RUSD_SEQ_BASE1 (RUSD_SEQ_BASE0 - 1llu)
#define RUSD_SEQ_COEFF1(x) ((RUSD_SEQ_BASE0 - ((x) % RUSD_SEQ_BASE0)) % RUSD_SEQ_BASE0)
#define RUSD_SEQ_COEFF0(x) ((x) % RUSD_SEQ_BASE1)
#define RUSD_SEQ_WRAP_SHIFT 18llu
#define RUSD_SEQ_WRAP_VAL (1llu << RUSD_SEQ_WRAP_SHIFT)
#define RUSD_SEQ_DATA_VALID(x) (RUSD_SEQ_COEFF0(x) == RUSD_SEQ_COEFF1(x))
//
// Helper macros to check seq before reading RUSD.
// No dowhile wrap as it is using continue/break
//
#define RUSD_SEQ_CHECK1(SHARED_DATA) \
NvU64 seq = (SHARED_DATA)->seq; \
portAtomicMemoryFenceLoad(); \
if (!RUSD_SEQ_DATA_VALID(seq)) \
continue;
#define RUSD_SEQ_CHECK2(SHARED_DATA) \
portAtomicMemoryFenceLoad(); \
if (seq == (SHARED_DATA)->seq) \
break;
enum {
RUSD_CLK_PUBLIC_DOMAIN_GRAPHICS = 0,
@ -166,10 +195,12 @@ typedef struct RUSD_INST_POWER_USAGE {
} RUSD_INST_POWER_USAGE;
typedef struct NV00DE_SHARED_DATA {
volatile NvU32 seq;
volatile NvU64 seq;
NvU32 bar1Size;
NvU32 bar1AvailSize;
NvU64 totalPmaMemory;
NvU64 freePmaMemory;
// GSP polling data section
NV_DECLARE_ALIGNED(RUSD_CLK_PUBLIC_DOMAIN_INFOS clkPublicDomainInfos, 8);

View File

@ -853,7 +853,8 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
} NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS;
/* vGPU capabilities */
#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU 0
#define NVA081_CTRL_VGPU_CAPABILITY_MINI_QUARTER_GPU 0
#define NVA081_CTRL_VGPU_CAPABILITY_COMPUTE_MEDIA_ENGINE_GPU 1
/*
* NVA081_CTRL_CMD_VGPU_SET_CAPABILITY
@ -872,7 +873,7 @@ typedef struct NVA081_CTRL_PGPU_GET_VGPU_STREAMING_CAPABILITY_PARAMS {
* NV_ERR_OBJECT_NOT_FOUND
* NV_ERR_INVALID_ARGUMENT
*/
#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */
#define NVA081_CTRL_CMD_VGPU_SET_CAPABILITY (0xa081011e) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID" */
#define NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS_MESSAGE_ID (0x1eU)
@ -881,4 +882,30 @@ typedef struct NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS {
NvBool state;
} NVA081_CTRL_VGPU_SET_CAPABILITY_PARAMS;
/*
* NVA081_CTRL_CMD_VGPU_GET_CAPABILITY
*
* This command is to get state of vGPU capability for the physical GPU.
*
* capability [IN]
* This param specifies the requested capabiity of the device that is to be set
* One of NVA081_CTRL_VGPU_CAPABILITY* values
*
* state [OUT]
* This param specifies the state of the capability
*
* Possible status values returned are:
* NV_OK
* NV_ERR_OBJECT_NOT_FOUND
* NV_ERR_INVALID_ARGUMENT
*/
#define NVA081_CTRL_CMD_VGPU_GET_CAPABILITY (0xa081011f) /* finn: Evaluated from "(FINN_NVA081_VGPU_CONFIG_VGPU_CONFIG_INTERFACE_ID << 8) | NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID" */
#define NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS_MESSAGE_ID (0x1fU)
typedef struct NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS {
NvU32 capability;
NvBool state;
} NVA081_CTRL_VGPU_GET_CAPABILITY_PARAMS;
/* _ctrlA081vgpuconfig_h_ */

View File

@ -44,151 +44,6 @@ ENTRY(0x13BD, 0x11D6, 0x10de, "GRID M10-8A"),
ENTRY(0x13BD, 0x1286, 0x10de, "GRID M10-2B"),
ENTRY(0x13BD, 0x12EE, 0x10de, "GRID M10-2B4"),
ENTRY(0x13BD, 0x1339, 0x10de, "GRID M10-1B4"),
ENTRY(0x13F2, 0x114C, 0x10de, "GRID M60-0Q"),
ENTRY(0x13F2, 0x114D, 0x10de, "GRID M60-1Q"),
ENTRY(0x13F2, 0x114E, 0x10de, "GRID M60-2Q"),
ENTRY(0x13F2, 0x114F, 0x10de, "GRID M60-4Q"),
ENTRY(0x13F2, 0x1150, 0x10de, "GRID M60-8Q"),
ENTRY(0x13F2, 0x1176, 0x10de, "GRID M60-0B"),
ENTRY(0x13F2, 0x1177, 0x10de, "GRID M60-1B"),
ENTRY(0x13F2, 0x117D, 0x10de, "GRID M60-2B"),
ENTRY(0x13F2, 0x11AE, 0x10de, "GRID M60-1A"),
ENTRY(0x13F2, 0x11AF, 0x10de, "GRID M60-2A"),
ENTRY(0x13F2, 0x11B0, 0x10de, "GRID M60-4A"),
ENTRY(0x13F2, 0x11B1, 0x10de, "GRID M60-8A"),
ENTRY(0x13F2, 0x12EC, 0x10de, "GRID M60-2B4"),
ENTRY(0x13F2, 0x1337, 0x10de, "GRID M60-1B4"),
ENTRY(0x13F3, 0x117C, 0x10de, "GRID M6-2B"),
ENTRY(0x13F3, 0x117E, 0x10de, "GRID M6-0B"),
ENTRY(0x13F3, 0x117F, 0x10de, "GRID M6-1B"),
ENTRY(0x13F3, 0x1180, 0x10de, "GRID M6-0Q"),
ENTRY(0x13F3, 0x1181, 0x10de, "GRID M6-1Q"),
ENTRY(0x13F3, 0x1182, 0x10de, "GRID M6-2Q"),
ENTRY(0x13F3, 0x1183, 0x10de, "GRID M6-4Q"),
ENTRY(0x13F3, 0x1184, 0x10de, "GRID M6-8Q"),
ENTRY(0x13F3, 0x11AA, 0x10de, "GRID M6-1A"),
ENTRY(0x13F3, 0x11AB, 0x10de, "GRID M6-2A"),
ENTRY(0x13F3, 0x11AC, 0x10de, "GRID M6-4A"),
ENTRY(0x13F3, 0x11AD, 0x10de, "GRID M6-8A"),
ENTRY(0x13F3, 0x12ED, 0x10de, "GRID M6-2B4"),
ENTRY(0x13F3, 0x1338, 0x10de, "GRID M6-1B4"),
ENTRY(0x15F7, 0x1265, 0x10de, "GRID P100C-1B"),
ENTRY(0x15F7, 0x1266, 0x10de, "GRID P100C-1Q"),
ENTRY(0x15F7, 0x1267, 0x10de, "GRID P100C-2Q"),
ENTRY(0x15F7, 0x1268, 0x10de, "GRID P100C-4Q"),
ENTRY(0x15F7, 0x1269, 0x10de, "GRID P100C-6Q"),
ENTRY(0x15F7, 0x126A, 0x10de, "GRID P100C-12Q"),
ENTRY(0x15F7, 0x126B, 0x10de, "GRID P100C-1A"),
ENTRY(0x15F7, 0x126C, 0x10de, "GRID P100C-2A"),
ENTRY(0x15F7, 0x126D, 0x10de, "GRID P100C-4A"),
ENTRY(0x15F7, 0x126E, 0x10de, "GRID P100C-6A"),
ENTRY(0x15F7, 0x126F, 0x10de, "GRID P100C-12A"),
ENTRY(0x15F7, 0x128D, 0x10de, "GRID P100C-2B"),
ENTRY(0x15F7, 0x12F4, 0x10de, "GRID P100C-2B4"),
ENTRY(0x15F7, 0x133F, 0x10de, "GRID P100C-1B4"),
ENTRY(0x15F7, 0x137D, 0x10de, "GRID P100C-12C"),
ENTRY(0x15F7, 0x138C, 0x10de, "GRID P100C-4C"),
ENTRY(0x15F7, 0x138D, 0x10de, "GRID P100C-6C"),
ENTRY(0x15F8, 0x1221, 0x10de, "GRID P100-1B"),
ENTRY(0x15F8, 0x1222, 0x10de, "GRID P100-1Q"),
ENTRY(0x15F8, 0x1223, 0x10de, "GRID P100-2Q"),
ENTRY(0x15F8, 0x1224, 0x10de, "GRID P100-4Q"),
ENTRY(0x15F8, 0x1225, 0x10de, "GRID P100-8Q"),
ENTRY(0x15F8, 0x1226, 0x10de, "GRID P100-16Q"),
ENTRY(0x15F8, 0x1227, 0x10de, "GRID P100-1A"),
ENTRY(0x15F8, 0x1228, 0x10de, "GRID P100-2A"),
ENTRY(0x15F8, 0x1229, 0x10de, "GRID P100-4A"),
ENTRY(0x15F8, 0x122A, 0x10de, "GRID P100-8A"),
ENTRY(0x15F8, 0x122B, 0x10de, "GRID P100-16A"),
ENTRY(0x15F8, 0x128C, 0x10de, "GRID P100-2B"),
ENTRY(0x15F8, 0x12F2, 0x10de, "GRID P100-2B4"),
ENTRY(0x15F8, 0x133D, 0x10de, "GRID P100-1B4"),
ENTRY(0x15F8, 0x137C, 0x10de, "GRID P100-16C"),
ENTRY(0x15F8, 0x138A, 0x10de, "GRID P100-4C"),
ENTRY(0x15F8, 0x138B, 0x10de, "GRID P100-8C"),
ENTRY(0x15F9, 0x122C, 0x10de, "GRID P100X-1B"),
ENTRY(0x15F9, 0x122D, 0x10de, "GRID P100X-1Q"),
ENTRY(0x15F9, 0x122E, 0x10de, "GRID P100X-2Q"),
ENTRY(0x15F9, 0x122F, 0x10de, "GRID P100X-4Q"),
ENTRY(0x15F9, 0x1230, 0x10de, "GRID P100X-8Q"),
ENTRY(0x15F9, 0x1231, 0x10de, "GRID P100X-16Q"),
ENTRY(0x15F9, 0x1232, 0x10de, "GRID P100X-1A"),
ENTRY(0x15F9, 0x1233, 0x10de, "GRID P100X-2A"),
ENTRY(0x15F9, 0x1234, 0x10de, "GRID P100X-4A"),
ENTRY(0x15F9, 0x1235, 0x10de, "GRID P100X-8A"),
ENTRY(0x15F9, 0x1236, 0x10de, "GRID P100X-16A"),
ENTRY(0x15F9, 0x128B, 0x10de, "GRID P100X-2B"),
ENTRY(0x15F9, 0x12F3, 0x10de, "GRID P100X-2B4"),
ENTRY(0x15F9, 0x133E, 0x10de, "GRID P100X-1B4"),
ENTRY(0x15F9, 0x137B, 0x10de, "GRID P100X-16C"),
ENTRY(0x15F9, 0x1388, 0x10de, "GRID P100X-4C"),
ENTRY(0x15F9, 0x1389, 0x10de, "GRID P100X-8C"),
ENTRY(0x1B38, 0x11E7, 0x10de, "GRID P40-1B"),
ENTRY(0x1B38, 0x11E8, 0x10de, "GRID P40-1Q"),
ENTRY(0x1B38, 0x11E9, 0x10de, "GRID P40-2Q"),
ENTRY(0x1B38, 0x11EA, 0x10de, "GRID P40-3Q"),
ENTRY(0x1B38, 0x11EB, 0x10de, "GRID P40-4Q"),
ENTRY(0x1B38, 0x11EC, 0x10de, "GRID P40-6Q"),
ENTRY(0x1B38, 0x11ED, 0x10de, "GRID P40-8Q"),
ENTRY(0x1B38, 0x11EE, 0x10de, "GRID P40-12Q"),
ENTRY(0x1B38, 0x11EF, 0x10de, "GRID P40-24Q"),
ENTRY(0x1B38, 0x11F0, 0x10de, "GRID P40-1A"),
ENTRY(0x1B38, 0x11F1, 0x10de, "GRID P40-2A"),
ENTRY(0x1B38, 0x11F2, 0x10de, "GRID P40-3A"),
ENTRY(0x1B38, 0x11F3, 0x10de, "GRID P40-4A"),
ENTRY(0x1B38, 0x11F4, 0x10de, "GRID P40-6A"),
ENTRY(0x1B38, 0x11F5, 0x10de, "GRID P40-8A"),
ENTRY(0x1B38, 0x11F6, 0x10de, "GRID P40-12A"),
ENTRY(0x1B38, 0x11F7, 0x10de, "GRID P40-24A"),
ENTRY(0x1B38, 0x1287, 0x10de, "GRID P40-2B"),
ENTRY(0x1B38, 0x12B1, 0x10de, "GeForce GTX P40-24"),
ENTRY(0x1B38, 0x12B2, 0x10de, "GeForce GTX P40-12"),
ENTRY(0x1B38, 0x12B3, 0x10de, "GeForce GTX P40-6"),
ENTRY(0x1B38, 0x12EF, 0x10de, "GRID P40-2B4"),
ENTRY(0x1B38, 0x133A, 0x10de, "GRID P40-1B4"),
ENTRY(0x1B38, 0x137E, 0x10de, "GRID P40-24C"),
ENTRY(0x1B38, 0x1381, 0x10de, "GRID P40-4C"),
ENTRY(0x1B38, 0x1382, 0x10de, "GRID P40-6C"),
ENTRY(0x1B38, 0x1383, 0x10de, "GRID P40-8C"),
ENTRY(0x1B38, 0x1384, 0x10de, "GRID P40-12C"),
ENTRY(0x1B38, 0x13B0, 0x10de, "GRID GTX P40-6"),
ENTRY(0x1B38, 0x13B1, 0x10de, "GRID GTX P40-12"),
ENTRY(0x1B38, 0x13B2, 0x10de, "GRID GTX P40-24"),
ENTRY(0x1B38, 0x13D0, 0x10de, "GRID GTX P40-8"),
ENTRY(0x1BB3, 0x1203, 0x10de, "GRID P4-1B"),
ENTRY(0x1BB3, 0x1204, 0x10de, "GRID P4-1Q"),
ENTRY(0x1BB3, 0x1205, 0x10de, "GRID P4-2Q"),
ENTRY(0x1BB3, 0x1206, 0x10de, "GRID P4-4Q"),
ENTRY(0x1BB3, 0x1207, 0x10de, "GRID P4-8Q"),
ENTRY(0x1BB3, 0x1208, 0x10de, "GRID P4-1A"),
ENTRY(0x1BB3, 0x1209, 0x10de, "GRID P4-2A"),
ENTRY(0x1BB3, 0x120A, 0x10de, "GRID P4-4A"),
ENTRY(0x1BB3, 0x120B, 0x10de, "GRID P4-8A"),
ENTRY(0x1BB3, 0x1288, 0x10de, "GRID P4-2B"),
ENTRY(0x1BB3, 0x12F1, 0x10de, "GRID P4-2B4"),
ENTRY(0x1BB3, 0x133C, 0x10de, "GRID P4-1B4"),
ENTRY(0x1BB3, 0x136D, 0x10de, "GRID GTX P4-2"),
ENTRY(0x1BB3, 0x136E, 0x10de, "GRID GTX P4-4"),
ENTRY(0x1BB3, 0x136F, 0x10de, "GRID GTX P4-8"),
ENTRY(0x1BB3, 0x1380, 0x10de, "GRID P4-8C"),
ENTRY(0x1BB3, 0x1385, 0x10de, "GRID P4-4C"),
ENTRY(0x1BB4, 0x11F8, 0x10de, "GRID P6-1B"),
ENTRY(0x1BB4, 0x11F9, 0x10de, "GRID P6-1Q"),
ENTRY(0x1BB4, 0x11FA, 0x10de, "GRID P6-2Q"),
ENTRY(0x1BB4, 0x11FB, 0x10de, "GRID P6-4Q"),
ENTRY(0x1BB4, 0x11FC, 0x10de, "GRID P6-8Q"),
ENTRY(0x1BB4, 0x11FD, 0x10de, "GRID P6-16Q"),
ENTRY(0x1BB4, 0x11FE, 0x10de, "GRID P6-1A"),
ENTRY(0x1BB4, 0x11FF, 0x10de, "GRID P6-2A"),
ENTRY(0x1BB4, 0x1200, 0x10de, "GRID P6-4A"),
ENTRY(0x1BB4, 0x1201, 0x10de, "GRID P6-8A"),
ENTRY(0x1BB4, 0x1202, 0x10de, "GRID P6-16A"),
ENTRY(0x1BB4, 0x1289, 0x10de, "GRID P6-2B"),
ENTRY(0x1BB4, 0x12F0, 0x10de, "GRID P6-2B4"),
ENTRY(0x1BB4, 0x133B, 0x10de, "GRID P6-1B4"),
ENTRY(0x1BB4, 0x137F, 0x10de, "GRID P6-16C"),
ENTRY(0x1BB4, 0x1386, 0x10de, "GRID P6-4C"),
ENTRY(0x1BB4, 0x1387, 0x10de, "GRID P6-8C"),
ENTRY(0x1DB1, 0x1259, 0x10de, "GRID V100X-1B"),
ENTRY(0x1DB1, 0x125A, 0x10de, "GRID V100X-1Q"),
ENTRY(0x1DB1, 0x125B, 0x10de, "GRID V100X-2Q"),
@ -813,6 +668,20 @@ ENTRY(0x2324, 0x18E0, 0x10de, "NVIDIA H800XM-16C"),
ENTRY(0x2324, 0x18E1, 0x10de, "NVIDIA H800XM-20C"),
ENTRY(0x2324, 0x18E2, 0x10de, "NVIDIA H800XM-40C"),
ENTRY(0x2324, 0x18E3, 0x10de, "NVIDIA H800XM-80C"),
ENTRY(0x2329, 0x2028, 0x10de, "NVIDIA H20-1-12CME"),
ENTRY(0x2329, 0x2029, 0x10de, "NVIDIA H20-1-12C"),
ENTRY(0x2329, 0x202A, 0x10de, "NVIDIA H20-1-24C"),
ENTRY(0x2329, 0x202B, 0x10de, "NVIDIA H20-2-24C"),
ENTRY(0x2329, 0x202C, 0x10de, "NVIDIA H20-3-48C"),
ENTRY(0x2329, 0x202D, 0x10de, "NVIDIA H20-4-48C"),
ENTRY(0x2329, 0x202E, 0x10de, "NVIDIA H20-7-96C"),
ENTRY(0x2329, 0x202F, 0x10de, "NVIDIA H20-4C"),
ENTRY(0x2329, 0x2030, 0x10de, "NVIDIA H20-6C"),
ENTRY(0x2329, 0x2031, 0x10de, "NVIDIA H20-12C"),
ENTRY(0x2329, 0x2032, 0x10de, "NVIDIA H20-16C"),
ENTRY(0x2329, 0x2033, 0x10de, "NVIDIA H20-24C"),
ENTRY(0x2329, 0x2034, 0x10de, "NVIDIA H20-48C"),
ENTRY(0x2329, 0x2035, 0x10de, "NVIDIA H20-96C"),
ENTRY(0x2330, 0x187A, 0x10de, "NVIDIA H100XM-1-10CME"),
ENTRY(0x2330, 0x187B, 0x10de, "NVIDIA H100XM-1-10C"),
ENTRY(0x2330, 0x187C, 0x10de, "NVIDIA H100XM-1-20C"),
@ -883,14 +752,14 @@ ENTRY(0x233A, 0x186B, 0x10de, "NVIDIA H800L-15C"),
ENTRY(0x233A, 0x186C, 0x10de, "NVIDIA H800L-23C"),
ENTRY(0x233A, 0x186D, 0x10de, "NVIDIA H800L-47C"),
ENTRY(0x233A, 0x186E, 0x10de, "NVIDIA H800L-94C"),
ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA H100GL-1-12CME"),
ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA H100GL-1-12C"),
ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA H100GL-1-24C"),
ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA H100GL-2-24C"),
ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA H100GL-3-48C"),
ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA H100GL-4-48C"),
ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA H100GL-7-96C"),
ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA H100GL-96C"),
ENTRY(0x2342, 0x18C2, 0x10de, "NVIDIA GH200-1-12CME"),
ENTRY(0x2342, 0x18C3, 0x10de, "NVIDIA GH200-1-12C"),
ENTRY(0x2342, 0x18C4, 0x10de, "NVIDIA GH200-1-24C"),
ENTRY(0x2342, 0x18C5, 0x10de, "NVIDIA GH200-2-24C"),
ENTRY(0x2342, 0x18C6, 0x10de, "NVIDIA GH200-3-48C"),
ENTRY(0x2342, 0x18C7, 0x10de, "NVIDIA GH200-4-48C"),
ENTRY(0x2342, 0x18C8, 0x10de, "NVIDIA GH200-7-96C"),
ENTRY(0x2342, 0x18C9, 0x10de, "NVIDIA GH200-96C"),
ENTRY(0x25B6, 0x159D, 0x10de, "NVIDIA A16-1B"),
ENTRY(0x25B6, 0x159E, 0x10de, "NVIDIA A16-2B"),
ENTRY(0x25B6, 0x159F, 0x10de, "NVIDIA A16-1Q"),
@ -987,6 +856,45 @@ ENTRY(0x26B2, 0x1835, 0x10de, "NVIDIA RTX5000-Ada-4C"),
ENTRY(0x26B2, 0x1836, 0x10de, "NVIDIA RTX5000-Ada-8C"),
ENTRY(0x26B2, 0x1837, 0x10de, "NVIDIA RTX5000-Ada-16C"),
ENTRY(0x26B2, 0x1838, 0x10de, "NVIDIA RTX5000-Ada-32C"),
ENTRY(0x26B3, 0x1958, 0x10de, "NVIDIA RTX 5880-Ada-1B"),
ENTRY(0x26B3, 0x1959, 0x10de, "NVIDIA RTX 5880-Ada-2B"),
ENTRY(0x26B3, 0x195A, 0x10de, "NVIDIA RTX 5880-Ada-1Q"),
ENTRY(0x26B3, 0x195B, 0x10de, "NVIDIA RTX 5880-Ada-2Q"),
ENTRY(0x26B3, 0x195C, 0x10de, "NVIDIA RTX 5880-Ada-3Q"),
ENTRY(0x26B3, 0x195D, 0x10de, "NVIDIA RTX 5880-Ada-4Q"),
ENTRY(0x26B3, 0x195E, 0x10de, "NVIDIA RTX 5880-Ada-6Q"),
ENTRY(0x26B3, 0x195F, 0x10de, "NVIDIA RTX 5880-Ada-8Q"),
ENTRY(0x26B3, 0x1960, 0x10de, "NVIDIA RTX 5880-Ada-12Q"),
ENTRY(0x26B3, 0x1961, 0x10de, "NVIDIA RTX 5880-Ada-16Q"),
ENTRY(0x26B3, 0x1962, 0x10de, "NVIDIA RTX 5880-Ada-24Q"),
ENTRY(0x26B3, 0x1963, 0x10de, "NVIDIA RTX 5880-Ada-48Q"),
ENTRY(0x26B3, 0x1964, 0x10de, "NVIDIA RTX 5880-Ada-1A"),
ENTRY(0x26B3, 0x1965, 0x10de, "NVIDIA RTX 5880-Ada-2A"),
ENTRY(0x26B3, 0x1966, 0x10de, "NVIDIA RTX 5880-Ada-3A"),
ENTRY(0x26B3, 0x1967, 0x10de, "NVIDIA RTX 5880-Ada-4A"),
ENTRY(0x26B3, 0x1968, 0x10de, "NVIDIA RTX 5880-Ada-6A"),
ENTRY(0x26B3, 0x1969, 0x10de, "NVIDIA RTX 5880-Ada-8A"),
ENTRY(0x26B3, 0x196A, 0x10de, "NVIDIA RTX 5880-Ada-12A"),
ENTRY(0x26B3, 0x196B, 0x10de, "NVIDIA RTX 5880-Ada-16A"),
ENTRY(0x26B3, 0x196C, 0x10de, "NVIDIA RTX 5880-Ada-24A"),
ENTRY(0x26B3, 0x196D, 0x10de, "NVIDIA RTX 5880-Ada-48A"),
ENTRY(0x26B3, 0x196E, 0x10de, "NVIDIA RTX 5880-Ada-1"),
ENTRY(0x26B3, 0x196F, 0x10de, "NVIDIA RTX 5880-Ada-2"),
ENTRY(0x26B3, 0x1970, 0x10de, "NVIDIA RTX 5880-Ada-3"),
ENTRY(0x26B3, 0x1971, 0x10de, "NVIDIA RTX 5880-Ada-4"),
ENTRY(0x26B3, 0x1972, 0x10de, "NVIDIA RTX 5880-Ada-6"),
ENTRY(0x26B3, 0x1973, 0x10de, "NVIDIA RTX 5880-Ada-8"),
ENTRY(0x26B3, 0x1974, 0x10de, "NVIDIA RTX 5880-Ada-12"),
ENTRY(0x26B3, 0x1975, 0x10de, "NVIDIA RTX 5880-Ada-16"),
ENTRY(0x26B3, 0x1976, 0x10de, "NVIDIA RTX 5880-Ada-24"),
ENTRY(0x26B3, 0x1977, 0x10de, "NVIDIA RTX 5880-Ada-48"),
ENTRY(0x26B3, 0x1978, 0x10de, "NVIDIA RTX 5880-Ada-4C"),
ENTRY(0x26B3, 0x1979, 0x10de, "NVIDIA RTX 5880-Ada-6C"),
ENTRY(0x26B3, 0x197A, 0x10de, "NVIDIA RTX 5880-Ada-8C"),
ENTRY(0x26B3, 0x197B, 0x10de, "NVIDIA RTX 5880-Ada-12C"),
ENTRY(0x26B3, 0x197C, 0x10de, "NVIDIA RTX 5880-Ada-16C"),
ENTRY(0x26B3, 0x197D, 0x10de, "NVIDIA RTX 5880-Ada-24C"),
ENTRY(0x26B3, 0x197E, 0x10de, "NVIDIA RTX 5880-Ada-48C"),
ENTRY(0x26B5, 0x176D, 0x10de, "NVIDIA L40-1B"),
ENTRY(0x26B5, 0x176E, 0x10de, "NVIDIA L40-2B"),
ENTRY(0x26B5, 0x176F, 0x10de, "NVIDIA L40-1Q"),
@ -1102,6 +1010,78 @@ ENTRY(0x26B9, 0x18AE, 0x10de, "NVIDIA L40S-12C"),
ENTRY(0x26B9, 0x18AF, 0x10de, "NVIDIA L40S-16C"),
ENTRY(0x26B9, 0x18B0, 0x10de, "NVIDIA L40S-24C"),
ENTRY(0x26B9, 0x18B1, 0x10de, "NVIDIA L40S-48C"),
ENTRY(0x26BA, 0x1909, 0x10de, "NVIDIA L20-1B"),
ENTRY(0x26BA, 0x190A, 0x10de, "NVIDIA L20-2B"),
ENTRY(0x26BA, 0x190B, 0x10de, "NVIDIA L20-1Q"),
ENTRY(0x26BA, 0x190C, 0x10de, "NVIDIA L20-2Q"),
ENTRY(0x26BA, 0x190D, 0x10de, "NVIDIA L20-3Q"),
ENTRY(0x26BA, 0x190E, 0x10de, "NVIDIA L20-4Q"),
ENTRY(0x26BA, 0x190F, 0x10de, "NVIDIA L20-6Q"),
ENTRY(0x26BA, 0x1910, 0x10de, "NVIDIA L20-8Q"),
ENTRY(0x26BA, 0x1911, 0x10de, "NVIDIA L20-12Q"),
ENTRY(0x26BA, 0x1912, 0x10de, "NVIDIA L20-16Q"),
ENTRY(0x26BA, 0x1913, 0x10de, "NVIDIA L20-24Q"),
ENTRY(0x26BA, 0x1914, 0x10de, "NVIDIA L20-48Q"),
ENTRY(0x26BA, 0x1915, 0x10de, "NVIDIA L20-1A"),
ENTRY(0x26BA, 0x1916, 0x10de, "NVIDIA L20-2A"),
ENTRY(0x26BA, 0x1917, 0x10de, "NVIDIA L20-3A"),
ENTRY(0x26BA, 0x1918, 0x10de, "NVIDIA L20-4A"),
ENTRY(0x26BA, 0x1919, 0x10de, "NVIDIA L20-6A"),
ENTRY(0x26BA, 0x191A, 0x10de, "NVIDIA L20-8A"),
ENTRY(0x26BA, 0x191B, 0x10de, "NVIDIA L20-12A"),
ENTRY(0x26BA, 0x191C, 0x10de, "NVIDIA L20-16A"),
ENTRY(0x26BA, 0x191D, 0x10de, "NVIDIA L20-24A"),
ENTRY(0x26BA, 0x191E, 0x10de, "NVIDIA L20-48A"),
ENTRY(0x26BA, 0x191F, 0x10de, "NVIDIA GeForce RTX 3050"),
ENTRY(0x26BA, 0x1920, 0x10de, "NVIDIA GeForce RTX 3060"),
ENTRY(0x26BA, 0x1921, 0x10de, "NVIDIA L20-1"),
ENTRY(0x26BA, 0x1922, 0x10de, "NVIDIA L20-2"),
ENTRY(0x26BA, 0x1923, 0x10de, "NVIDIA L20-3"),
ENTRY(0x26BA, 0x1924, 0x10de, "NVIDIA L20-4"),
ENTRY(0x26BA, 0x1925, 0x10de, "NVIDIA L20-6"),
ENTRY(0x26BA, 0x1926, 0x10de, "NVIDIA L20-8"),
ENTRY(0x26BA, 0x1927, 0x10de, "NVIDIA L20-12"),
ENTRY(0x26BA, 0x1928, 0x10de, "NVIDIA L20-16"),
ENTRY(0x26BA, 0x1929, 0x10de, "NVIDIA L20-24"),
ENTRY(0x26BA, 0x192A, 0x10de, "NVIDIA L20-48"),
ENTRY(0x26BA, 0x192B, 0x10de, "NVIDIA L20-4C"),
ENTRY(0x26BA, 0x192C, 0x10de, "NVIDIA L20-6C"),
ENTRY(0x26BA, 0x192D, 0x10de, "NVIDIA L20-8C"),
ENTRY(0x26BA, 0x192E, 0x10de, "NVIDIA L20-12C"),
ENTRY(0x26BA, 0x192F, 0x10de, "NVIDIA L20-16C"),
ENTRY(0x26BA, 0x1930, 0x10de, "NVIDIA L20-24C"),
ENTRY(0x26BA, 0x1931, 0x10de, "NVIDIA L20-48C"),
ENTRY(0x27B6, 0x1938, 0x10de, "NVIDIA L2-1B"),
ENTRY(0x27B6, 0x1939, 0x10de, "NVIDIA L2-2B"),
ENTRY(0x27B6, 0x193A, 0x10de, "NVIDIA L2-1Q"),
ENTRY(0x27B6, 0x193B, 0x10de, "NVIDIA L2-2Q"),
ENTRY(0x27B6, 0x193C, 0x10de, "NVIDIA L2-3Q"),
ENTRY(0x27B6, 0x193D, 0x10de, "NVIDIA L2-4Q"),
ENTRY(0x27B6, 0x193E, 0x10de, "NVIDIA L2-6Q"),
ENTRY(0x27B6, 0x193F, 0x10de, "NVIDIA L2-8Q"),
ENTRY(0x27B6, 0x1940, 0x10de, "NVIDIA L2-12Q"),
ENTRY(0x27B6, 0x1941, 0x10de, "NVIDIA L2-24Q"),
ENTRY(0x27B6, 0x1942, 0x10de, "NVIDIA L2-1A"),
ENTRY(0x27B6, 0x1943, 0x10de, "NVIDIA L2-2A"),
ENTRY(0x27B6, 0x1944, 0x10de, "NVIDIA L2-3A"),
ENTRY(0x27B6, 0x1945, 0x10de, "NVIDIA L2-4A"),
ENTRY(0x27B6, 0x1946, 0x10de, "NVIDIA L2-6A"),
ENTRY(0x27B6, 0x1947, 0x10de, "NVIDIA L2-8A"),
ENTRY(0x27B6, 0x1948, 0x10de, "NVIDIA L2-12A"),
ENTRY(0x27B6, 0x1949, 0x10de, "NVIDIA L2-24A"),
ENTRY(0x27B6, 0x194A, 0x10de, "NVIDIA L2-1"),
ENTRY(0x27B6, 0x194B, 0x10de, "NVIDIA L2-2"),
ENTRY(0x27B6, 0x194C, 0x10de, "NVIDIA L2-3"),
ENTRY(0x27B6, 0x194D, 0x10de, "NVIDIA L2-4"),
ENTRY(0x27B6, 0x194E, 0x10de, "NVIDIA L2-6"),
ENTRY(0x27B6, 0x194F, 0x10de, "NVIDIA L2-8"),
ENTRY(0x27B6, 0x1950, 0x10de, "NVIDIA L2-12"),
ENTRY(0x27B6, 0x1951, 0x10de, "NVIDIA L2-24"),
ENTRY(0x27B6, 0x1952, 0x10de, "NVIDIA L2-4C"),
ENTRY(0x27B6, 0x1953, 0x10de, "NVIDIA L2-6C"),
ENTRY(0x27B6, 0x1954, 0x10de, "NVIDIA L2-8C"),
ENTRY(0x27B6, 0x1955, 0x10de, "NVIDIA L2-12C"),
ENTRY(0x27B6, 0x1956, 0x10de, "NVIDIA L2-24C"),
ENTRY(0x27B8, 0x172F, 0x10de, "NVIDIA L4-1B"),
ENTRY(0x27B8, 0x1730, 0x10de, "NVIDIA L4-2B"),
ENTRY(0x27B8, 0x1731, 0x10de, "NVIDIA L4-1Q"),

View File

@ -18,9 +18,9 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
{ 0x20B9, 0x157F, 0x20B7, 0x1532 },
{ 0x20FD, 0x17F8, 0x20F5, 0x0 },
{ 0x2324, 0x17A8, 0x2324, 0x17A6 },
{ 0x2329, 0x198C, 0x2329, 0x198B },
{ 0x2330, 0x16C0, 0x2330, 0x16C1 },
{ 0x2336, 0x16C2, 0x2330, 0x16C1 },
{ 0x2342, 0x1809, 0x2342, 0x1805 },
};
for (NvU32 i = 0; i < (sizeof(vgpu_aliases) / sizeof(struct vgpu_alias_details)); ++i) {
@ -136,6 +136,13 @@ static const struct {
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1280}, // NVIDIA H800XM-3-40C
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1281}, // NVIDIA H800XM-4-40C
{0x232410DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1282}, // NVIDIA H800XM-7-80C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1397}, // NVIDIA H20-1-12CME
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1398}, // NVIDIA H20-1-12C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1399}, // NVIDIA H20-1-24C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1400}, // NVIDIA H20-2-24C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1401}, // NVIDIA H20-3-48C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1402}, // NVIDIA H20-4-48C
{0x232910DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1403}, // NVIDIA H20-7-96C
{0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
{0x233610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1130}, // NVIDIA H100XM-1-10CME
{0x233010DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1131}, // NVIDIA H100XM-1-10C
@ -178,13 +185,13 @@ static const struct {
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1079}, // NVIDIA H800L-3-47C
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1080}, // NVIDIA H800L-4-47C
{0x233A10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1081}, // NVIDIA H800L-7-94C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA H100GL-1-12CME
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1197}, // NVIDIA H100GL-1-12C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1198}, // NVIDIA H100GL-1-24C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1199}, // NVIDIA H100GL-2-24C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1200}, // NVIDIA H100GL-3-48C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1201}, // NVIDIA H100GL-4-48C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1202}, // NVIDIA H100GL-7-96C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1196}, // NVIDIA GH200-1-12CME
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1197}, // NVIDIA GH200-1-12C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1198}, // NVIDIA GH200-1-24C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1199}, // NVIDIA GH200-2-24C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1200}, // NVIDIA GH200-3-48C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1201}, // NVIDIA GH200-4-48C
{0x234210DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1202}, // NVIDIA GH200-7-96C
};
#endif // GENERATE_vgpuSmcTypeIdMappings

View File

@ -2397,17 +2397,19 @@ NvBool nvHdmiFrlQueryConfig(
NvU8 *pHdmiFrlBpc,
NVDscInfoEvoRec *pDscInfo)
{
// Try first with 10 BPC
if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
pModeTimings,
pHwTimings,
b2Heads1Or,
pValidationParams,
HDMI_BPC10,
pConfig,
pHdmiFrlBpc,
pDscInfo)) {
return TRUE;
if (nvDpyIsHdmiDepth30Evo(pDpyEvo)) {
// Try first with 10 BPC
if (nvHdmiFrlQueryConfigOneBpc(pDpyEvo,
pModeTimings,
pHwTimings,
b2Heads1Or,
pValidationParams,
HDMI_BPC10,
pConfig,
pHdmiFrlBpc,
pDscInfo)) {
return TRUE;
}
}
// Try again with 8 BPC

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -223,7 +223,8 @@ typedef struct
#define GSP_FW_HEAP_FREE_LIST_MAGIC 0x4845415046524545ULL
#define GSP_FW_FLAGS 8:0
#define GSP_FW_FLAGS_CLOCK_BOOST NVBIT(0)
#define GSP_FW_FLAGS 8:0
#define GSP_FW_FLAGS_CLOCK_BOOST NVBIT(0)
#define GSP_FW_FLAGS_RECOVERY_MARGIN_PRESENT NVBIT(1)
#endif // GSP_FW_WPR_META_H_

View File

@ -0,0 +1,160 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*!
* @file rmspdmrsakeys.h
* @brief SPDM Interfaces - RSA keys
*/
#ifndef _RM_SPDM_RSA_KEYS_H_
#define _RM_SPDM_RSA_KEYS_H_
//
// Note !!
// All key components order must be big-endian(network order).
// If any change for these key components, need to rebuild GSP/RM code as well.
//
#ifndef USE_MBEDTLS
const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
{
0xcd, 0x65, 0xd2, 0xca, 0x74, 0xd1, 0x76, 0x76, 0x9f, 0x18, 0x23, 0x64, 0x55, 0x84, 0x98, 0x60,
0x71, 0xeb, 0x42, 0x6d, 0xa4, 0x98, 0xf9, 0x92, 0xea, 0x83, 0x5b, 0x9b, 0xe2, 0x66, 0x8b, 0x43,
0x14, 0x4d, 0x5a, 0xb8, 0xdb, 0x68, 0x2b, 0xfa, 0x5f, 0xdf, 0x7f, 0xf4, 0xfd, 0x5e, 0x42, 0x34,
0x09, 0x98, 0xa1, 0x21, 0x98, 0x4c, 0x8d, 0xbc, 0x99, 0xdb, 0xea, 0xc1, 0xe3, 0x42, 0xe6, 0x67,
0x26, 0x86, 0x2c, 0xd0, 0xdb, 0xf3, 0x9c, 0x12, 0xad, 0xb3, 0x82, 0x93, 0x9c, 0xb9, 0xae, 0x98,
0x82, 0xeb, 0x59, 0xb6, 0x5c, 0x09, 0x9f, 0xa6, 0x15, 0x30, 0xa0, 0xc6, 0x77, 0xd5, 0xae, 0xa2,
0x91, 0x65, 0x24, 0xc3, 0x7d, 0x9b, 0xa4, 0x2c, 0x31, 0x73, 0x41, 0x26, 0x72, 0xe7, 0x2f, 0xb8,
0x60, 0xd8, 0xce, 0xb8, 0xd8, 0x4b, 0x90, 0x6c, 0xa3, 0x19, 0x7e, 0x2b, 0xd5, 0xf6, 0x05, 0x8a,
0x2b, 0xb9, 0x9e, 0x27, 0xba, 0x2e, 0x16, 0x81, 0x9a, 0x9e, 0xf5, 0x6c, 0x38, 0x0a, 0x01, 0xea,
0xd6, 0xe1, 0xa4, 0x83, 0x76, 0xd0, 0x68, 0xbb, 0x90, 0x63, 0xce, 0x1c, 0x8c, 0x6f, 0x0f, 0x6b,
0x65, 0x5b, 0x12, 0xe2, 0x92, 0x60, 0x79, 0x88, 0x99, 0x5e, 0x17, 0x89, 0x4d, 0x54, 0xb1, 0x87,
0x9c, 0xe6, 0x6d, 0x3c, 0x56, 0x1c, 0x3a, 0x3e, 0xb9, 0x16, 0x2c, 0xc8, 0xda, 0x1a, 0xfb, 0x5c,
0xd7, 0x16, 0x3c, 0x74, 0xda, 0x78, 0xf1, 0x53, 0xd7, 0x66, 0x97, 0x8a, 0x57, 0x0b, 0x86, 0x57,
0x2c, 0x45, 0xa1, 0x5e, 0xae, 0x39, 0x39, 0xde, 0xe0, 0x40, 0x6f, 0xdb, 0x4c, 0xd8, 0xc1, 0x8c,
0xdb, 0xce, 0xea, 0x05, 0xc0, 0xbc, 0x89, 0x72, 0x15, 0xfd, 0xbf, 0xb2, 0xb7, 0xf2, 0x5d, 0x05,
0xdd, 0x2b, 0x53, 0xa4, 0x03, 0x1e, 0x11, 0x67, 0xa7, 0x0f, 0x87, 0xfb, 0x57, 0x48, 0x91, 0xc8,
0x02, 0xb7, 0x46, 0x3d, 0x82, 0xcd, 0x06, 0x4a, 0x79, 0x0f, 0xa5, 0x8a, 0xac, 0xfb, 0xb7, 0xc2,
0xf0, 0x95, 0x19, 0x4c, 0x78, 0x7a, 0xc7, 0xd7, 0x70, 0xee, 0x6e, 0x59, 0xaf, 0x51, 0x9b, 0x11,
0x03, 0xd3, 0x56, 0xb3, 0x05, 0x5a, 0xbb, 0x1f, 0xbd, 0xc2, 0x0e, 0x89, 0x77, 0xb4, 0xc1, 0x02,
0xf9, 0x97, 0x56, 0x07, 0x2e, 0x4e, 0x2d, 0x01, 0x73, 0x89, 0x7d, 0xf3, 0xc9, 0x8c, 0x88, 0x2c,
0x79, 0xd9, 0x47, 0x34, 0x9e, 0x32, 0x51, 0xd4, 0xa6, 0x7e, 0xd1, 0x08, 0xda, 0xc0, 0x76, 0x24,
0x8e, 0x25, 0x73, 0x14, 0x30, 0xd2, 0x17, 0x37, 0xbc, 0xe0, 0x3e, 0xa2, 0x47, 0xff, 0xe2, 0x4e,
0x9b, 0x31, 0x6c, 0xe6, 0x54, 0xaf, 0x62, 0x3a, 0xcd, 0xfa, 0x2f, 0xaf, 0x73, 0x2e, 0x73, 0x4a,
0x3a, 0x60, 0xa8, 0xa9, 0xfc, 0x77, 0xb2, 0x57, 0xdd, 0x3a, 0xfa, 0xce, 0x35, 0xc3, 0xea, 0xa9
};
const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x00, 0x01, 0x00, 0x01};
const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
{
0x04, 0x85, 0xc0, 0x6d, 0x6a, 0xc3, 0x0d, 0xeb, 0xb0, 0xb7, 0x14, 0x58, 0x6a, 0x35, 0xa0, 0x31,
0x47, 0x70, 0xd9, 0xa6, 0x96, 0x60, 0x33, 0xe6, 0x93, 0x39, 0x4f, 0x34, 0x10, 0x79, 0x17, 0x89,
0xe3, 0x91, 0x8c, 0x74, 0xd3, 0x87, 0xe8, 0xa7, 0xfb, 0xa2, 0x6d, 0x2b, 0xd4, 0xc4, 0x55, 0x8a,
0xd5, 0xc7, 0x41, 0x8c, 0xfe, 0xd0, 0x78, 0xb2, 0x75, 0x64, 0xcd, 0x37, 0x75, 0xac, 0x8c, 0x6c,
0x2b, 0x01, 0xfb, 0x4c, 0xa1, 0xfb, 0x2c, 0x2d, 0x91, 0x1b, 0x89, 0xbd, 0x7c, 0x4e, 0xee, 0x54,
0x37, 0x55, 0xbd, 0x44, 0xf1, 0xa3, 0xd8, 0x9b, 0x0c, 0x8e, 0x64, 0x8b, 0xda, 0x29, 0x9f, 0x4d,
0xbf, 0x0b, 0xce, 0x12, 0x6b, 0xda, 0x98, 0x73, 0xcc, 0xa6, 0xf0, 0x01, 0x4b, 0xb8, 0x61, 0x69,
0x9a, 0xe0, 0x4b, 0x34, 0xf0, 0xb6, 0x41, 0x38, 0xd3, 0x22, 0x85, 0x8f, 0xab, 0x87, 0x5e, 0x39,
0xdb, 0x9a, 0x9b, 0xab, 0xde, 0x42, 0x93, 0x86, 0x49, 0x04, 0x44, 0xfd, 0x1c, 0x02, 0xc4, 0x66,
0x0a, 0x53, 0x2f, 0x8f, 0x21, 0x77, 0x97, 0x46, 0xc1, 0xf8, 0x20, 0x9a, 0xaa, 0x50, 0xeb, 0xb2,
0xfe, 0xa9, 0x51, 0xb2, 0x21, 0x6c, 0xf4, 0x60, 0x18, 0x98, 0x64, 0xc6, 0x46, 0x28, 0x9e, 0x3c,
0x11, 0x2d, 0x55, 0xac, 0x65, 0x6c, 0xfb, 0xaf, 0x53, 0xdd, 0xf4, 0x20, 0x7c, 0x04, 0xea, 0x11,
0xe8, 0x25, 0x65, 0x15, 0x32, 0x8e, 0x08, 0x84, 0xc7, 0x41, 0x13, 0x58, 0x73, 0x61, 0x63, 0x1f,
0xef, 0x30, 0x1d, 0x3d, 0x36, 0x08, 0x3a, 0xb2, 0xf7, 0x25, 0x56, 0xfc, 0x03, 0x78, 0xb5, 0x45,
0xf4, 0x60, 0x47, 0x95, 0x1c, 0x6a, 0x2b, 0xfa, 0xe3, 0x1c, 0x2f, 0x0d, 0x2a, 0x90, 0x43, 0x65,
0x09, 0xbe, 0x63, 0x71, 0x33, 0xd3, 0x29, 0x31, 0xd5, 0x29, 0x26, 0xa3, 0x15, 0xc2, 0x46, 0x70,
0xb4, 0x3e, 0x23, 0xaf, 0xb6, 0xfb, 0x87, 0x8f, 0x39, 0xf0, 0xd2, 0x3e, 0x35, 0x3c, 0xec, 0x7a,
0xd2, 0x0f, 0xd8, 0xa4, 0x0c, 0x19, 0xc8, 0xee, 0x47, 0x7c, 0x1e, 0xd6, 0x67, 0x31, 0xe2, 0x9d,
0xc0, 0x65, 0x64, 0x60, 0xe7, 0xd2, 0xeb, 0xe1, 0x02, 0xd5, 0x92, 0x7c, 0x51, 0xf1, 0x3b, 0x12,
0x00, 0x65, 0xfd, 0x2b, 0x13, 0x15, 0xfa, 0x6d, 0x99, 0x1d, 0xd3, 0x03, 0x77, 0xb1, 0xb0, 0xf0,
0x39, 0x7c, 0x27, 0x13, 0x30, 0xba, 0xff, 0x4d, 0x2e, 0xda, 0xe0, 0x37, 0xad, 0xf4, 0x49, 0x0a,
0xdd, 0x1e, 0x87, 0x8c, 0xc9, 0x6b, 0xf8, 0xc6, 0xb3, 0x05, 0xeb, 0x6c, 0x5f, 0x84, 0x64, 0x62,
0x1c, 0xf6, 0x04, 0x6f, 0xd7, 0xa9, 0xbc, 0x22, 0x97, 0xdb, 0x8d, 0xa5, 0xe1, 0x3a, 0x5c, 0x0d,
0x7b, 0x78, 0x25, 0x98, 0x04, 0x7f, 0x2b, 0x59, 0x5b, 0x7c, 0xf7, 0x73, 0x37, 0x7f, 0x92, 0x8d
};
#else
const NvU8 g_rsa3k_modulus_mutual_authentication_prod[] =
{
0xa9, 0xea, 0xc3, 0x35, 0xce, 0xfa, 0x3a, 0xdd, 0x57, 0xb2, 0x77, 0xfc, 0xa9, 0xa8, 0x60, 0x3a,
0x4a, 0x73, 0x2e, 0x73, 0xaf, 0x2f, 0xfa, 0xcd, 0x3a, 0x62, 0xaf, 0x54, 0xe6, 0x6c, 0x31, 0x9b,
0x4e, 0xe2, 0xff, 0x47, 0xa2, 0x3e, 0xe0, 0xbc, 0x37, 0x17, 0xd2, 0x30, 0x14, 0x73, 0x25, 0x8e,
0x24, 0x76, 0xc0, 0xda, 0x08, 0xd1, 0x7e, 0xa6, 0xd4, 0x51, 0x32, 0x9e, 0x34, 0x47, 0xd9, 0x79,
0x2c, 0x88, 0x8c, 0xc9, 0xf3, 0x7d, 0x89, 0x73, 0x01, 0x2d, 0x4e, 0x2e, 0x07, 0x56, 0x97, 0xf9,
0x02, 0xc1, 0xb4, 0x77, 0x89, 0x0e, 0xc2, 0xbd, 0x1f, 0xbb, 0x5a, 0x05, 0xb3, 0x56, 0xd3, 0x03,
0x11, 0x9b, 0x51, 0xaf, 0x59, 0x6e, 0xee, 0x70, 0xd7, 0xc7, 0x7a, 0x78, 0x4c, 0x19, 0x95, 0xf0,
0xc2, 0xb7, 0xfb, 0xac, 0x8a, 0xa5, 0x0f, 0x79, 0x4a, 0x06, 0xcd, 0x82, 0x3d, 0x46, 0xb7, 0x02,
0xc8, 0x91, 0x48, 0x57, 0xfb, 0x87, 0x0f, 0xa7, 0x67, 0x11, 0x1e, 0x03, 0xa4, 0x53, 0x2b, 0xdd,
0x05, 0x5d, 0xf2, 0xb7, 0xb2, 0xbf, 0xfd, 0x15, 0x72, 0x89, 0xbc, 0xc0, 0x05, 0xea, 0xce, 0xdb,
0x8c, 0xc1, 0xd8, 0x4c, 0xdb, 0x6f, 0x40, 0xe0, 0xde, 0x39, 0x39, 0xae, 0x5e, 0xa1, 0x45, 0x2c,
0x57, 0x86, 0x0b, 0x57, 0x8a, 0x97, 0x66, 0xd7, 0x53, 0xf1, 0x78, 0xda, 0x74, 0x3c, 0x16, 0xd7,
0x5c, 0xfb, 0x1a, 0xda, 0xc8, 0x2c, 0x16, 0xb9, 0x3e, 0x3a, 0x1c, 0x56, 0x3c, 0x6d, 0xe6, 0x9c,
0x87, 0xb1, 0x54, 0x4d, 0x89, 0x17, 0x5e, 0x99, 0x88, 0x79, 0x60, 0x92, 0xe2, 0x12, 0x5b, 0x65,
0x6b, 0x0f, 0x6f, 0x8c, 0x1c, 0xce, 0x63, 0x90, 0xbb, 0x68, 0xd0, 0x76, 0x83, 0xa4, 0xe1, 0xd6,
0xea, 0x01, 0x0a, 0x38, 0x6c, 0xf5, 0x9e, 0x9a, 0x81, 0x16, 0x2e, 0xba, 0x27, 0x9e, 0xb9, 0x2b,
0x8a, 0x05, 0xf6, 0xd5, 0x2b, 0x7e, 0x19, 0xa3, 0x6c, 0x90, 0x4b, 0xd8, 0xb8, 0xce, 0xd8, 0x60,
0xb8, 0x2f, 0xe7, 0x72, 0x26, 0x41, 0x73, 0x31, 0x2c, 0xa4, 0x9b, 0x7d, 0xc3, 0x24, 0x65, 0x91,
0xa2, 0xae, 0xd5, 0x77, 0xc6, 0xa0, 0x30, 0x15, 0xa6, 0x9f, 0x09, 0x5c, 0xb6, 0x59, 0xeb, 0x82,
0x98, 0xae, 0xb9, 0x9c, 0x93, 0x82, 0xb3, 0xad, 0x12, 0x9c, 0xf3, 0xdb, 0xd0, 0x2c, 0x86, 0x26,
0x67, 0xe6, 0x42, 0xe3, 0xc1, 0xea, 0xdb, 0x99, 0xbc, 0x8d, 0x4c, 0x98, 0x21, 0xa1, 0x98, 0x09,
0x34, 0x42, 0x5e, 0xfd, 0xf4, 0x7f, 0xdf, 0x5f, 0xfa, 0x2b, 0x68, 0xdb, 0xb8, 0x5a, 0x4d, 0x14,
0x43, 0x8b, 0x66, 0xe2, 0x9b, 0x5b, 0x83, 0xea, 0x92, 0xf9, 0x98, 0xa4, 0x6d, 0x42, 0xeb, 0x71,
0x60, 0x98, 0x84, 0x55, 0x64, 0x23, 0x18, 0x9f, 0x76, 0x76, 0xd1, 0x74, 0xca, 0xd2, 0x65, 0xcd,
0x00
};
const NvU8 g_rsa3k_public_exponent_mutual_authentication_prod[] = {0x01, 0x00, 0x01, 0x00};
const NvU8 g_rsa3k_private_exponent_mutual_authentication_prod[] =
{
0x8d, 0x92, 0x7f, 0x37, 0x73, 0xf7, 0x7c, 0x5b, 0x59, 0x2b, 0x7f, 0x04, 0x98, 0x25, 0x78, 0x7b,
0x0d, 0x5c, 0x3a, 0xe1, 0xa5, 0x8d, 0xdb, 0x97, 0x22, 0xbc, 0xa9, 0xd7, 0x6f, 0x04, 0xf6, 0x1c,
0x62, 0x64, 0x84, 0x5f, 0x6c, 0xeb, 0x05, 0xb3, 0xc6, 0xf8, 0x6b, 0xc9, 0x8c, 0x87, 0x1e, 0xdd,
0x0a, 0x49, 0xf4, 0xad, 0x37, 0xe0, 0xda, 0x2e, 0x4d, 0xff, 0xba, 0x30, 0x13, 0x27, 0x7c, 0x39,
0xf0, 0xb0, 0xb1, 0x77, 0x03, 0xd3, 0x1d, 0x99, 0x6d, 0xfa, 0x15, 0x13, 0x2b, 0xfd, 0x65, 0x00,
0x12, 0x3b, 0xf1, 0x51, 0x7c, 0x92, 0xd5, 0x02, 0xe1, 0xeb, 0xd2, 0xe7, 0x60, 0x64, 0x65, 0xc0,
0x9d, 0xe2, 0x31, 0x67, 0xd6, 0x1e, 0x7c, 0x47, 0xee, 0xc8, 0x19, 0x0c, 0xa4, 0xd8, 0x0f, 0xd2,
0x7a, 0xec, 0x3c, 0x35, 0x3e, 0xd2, 0xf0, 0x39, 0x8f, 0x87, 0xfb, 0xb6, 0xaf, 0x23, 0x3e, 0xb4,
0x70, 0x46, 0xc2, 0x15, 0xa3, 0x26, 0x29, 0xd5, 0x31, 0x29, 0xd3, 0x33, 0x71, 0x63, 0xbe, 0x09,
0x65, 0x43, 0x90, 0x2a, 0x0d, 0x2f, 0x1c, 0xe3, 0xfa, 0x2b, 0x6a, 0x1c, 0x95, 0x47, 0x60, 0xf4,
0x45, 0xb5, 0x78, 0x03, 0xfc, 0x56, 0x25, 0xf7, 0xb2, 0x3a, 0x08, 0x36, 0x3d, 0x1d, 0x30, 0xef,
0x1f, 0x63, 0x61, 0x73, 0x58, 0x13, 0x41, 0xc7, 0x84, 0x08, 0x8e, 0x32, 0x15, 0x65, 0x25, 0xe8,
0x11, 0xea, 0x04, 0x7c, 0x20, 0xf4, 0xdd, 0x53, 0xaf, 0xfb, 0x6c, 0x65, 0xac, 0x55, 0x2d, 0x11,
0x3c, 0x9e, 0x28, 0x46, 0xc6, 0x64, 0x98, 0x18, 0x60, 0xf4, 0x6c, 0x21, 0xb2, 0x51, 0xa9, 0xfe,
0xb2, 0xeb, 0x50, 0xaa, 0x9a, 0x20, 0xf8, 0xc1, 0x46, 0x97, 0x77, 0x21, 0x8f, 0x2f, 0x53, 0x0a,
0x66, 0xc4, 0x02, 0x1c, 0xfd, 0x44, 0x04, 0x49, 0x86, 0x93, 0x42, 0xde, 0xab, 0x9b, 0x9a, 0xdb,
0x39, 0x5e, 0x87, 0xab, 0x8f, 0x85, 0x22, 0xd3, 0x38, 0x41, 0xb6, 0xf0, 0x34, 0x4b, 0xe0, 0x9a,
0x69, 0x61, 0xb8, 0x4b, 0x01, 0xf0, 0xa6, 0xcc, 0x73, 0x98, 0xda, 0x6b, 0x12, 0xce, 0x0b, 0xbf,
0x4d, 0x9f, 0x29, 0xda, 0x8b, 0x64, 0x8e, 0x0c, 0x9b, 0xd8, 0xa3, 0xf1, 0x44, 0xbd, 0x55, 0x37,
0x54, 0xee, 0x4e, 0x7c, 0xbd, 0x89, 0x1b, 0x91, 0x2d, 0x2c, 0xfb, 0xa1, 0x4c, 0xfb, 0x01, 0x2b,
0x6c, 0x8c, 0xac, 0x75, 0x37, 0xcd, 0x64, 0x75, 0xb2, 0x78, 0xd0, 0xfe, 0x8c, 0x41, 0xc7, 0xd5,
0x8a, 0x55, 0xc4, 0xd4, 0x2b, 0x6d, 0xa2, 0xfb, 0xa7, 0xe8, 0x87, 0xd3, 0x74, 0x8c, 0x91, 0xe3,
0x89, 0x17, 0x79, 0x10, 0x34, 0x4f, 0x39, 0x93, 0xe6, 0x33, 0x60, 0x96, 0xa6, 0xd9, 0x70, 0x47,
0x31, 0xa0, 0x35, 0x6a, 0x58, 0x14, 0xb7, 0xb0, 0xeb, 0x0d, 0xc3, 0x6a, 0x6d, 0xc0, 0x85, 0x04
};
#endif
#endif // _RM_SPDM_RSA_KEYS_H_

View File

@ -839,6 +839,45 @@
#define __NV_ENABLE_NONBLOCKING_OPEN EnableNonblockingOpen
#define NV_ENABLE_NONBLOCKING_OPEN NV_REG_STRING(__NV_ENABLE_NONBLOCKING_OPEN)
/*
* Option: NVreg_ImexChannelCount
*
* Description:
*
* This option allows users to specify the number of IMEX (import/export)
* channels. Within an IMEX domain, the channels allow sharing memory
* securely in a multi-user environment using the CUDA driver's fabric handle
* based APIs.
*
* An IMEX domain is either an OS instance or a group of securely
* connected OS instances using the NVIDIA IMEX daemon. The option must
* be set to the same value on each OS instance within the IMEX domain.
*
* An IMEX channel is a logical entity that is represented by a /dev node.
* The IMEX channels are global resources within the IMEX domain. When
* exporter and importer CUDA processes have been granted access to the
* same IMEX channel, they can securely share memory.
*
* Note that the NVIDIA driver will not attempt to create the /dev nodes. Thus,
* the related CUDA APIs will fail with an insufficient permission error until
* the /dev nodes are set up. The creation of these /dev nodes,
* /dev/nvidia-caps-imex-channels/channelN, must be handled by the
* administrator, where N is the minor number. The major number can be
* queried from /proc/devices.
*
* nvidia-modprobe CLI support is available to set up the /dev nodes.
* NVreg_ModifyDeviceFiles, NVreg_DeviceFileGID, NVreg_DeviceFileUID
* and NVreg_DeviceFileMode will be honored by nvidia-modprobe.
*
* Possible values:
* 0 - Disable IMEX using CUDA driver's fabric handles.
* N - N IMEX channels will be enabled in the driver to facilitate N
* concurrent users. Default value is 2048 channels, and the current
* maximum value is 20-bit, same as Linux dev_t's minor number limit.
*/
#define __NV_IMEX_CHANNEL_COUNT ImexChannelCount
#define NV_REG_IMEX_CHANNEL_COUNT NV_REG_STRING(__NV_IMEX_CHANNEL_COUNT)
#if defined(NV_DEFINE_REGISTRY_KEY_TABLE)
/*
@ -887,6 +926,7 @@ NV_DEFINE_REG_STRING_ENTRY(__NV_TEMPORARY_FILE_PATH, NULL);
NV_DEFINE_REG_STRING_ENTRY(__NV_EXCLUDED_GPUS, NULL);
NV_DEFINE_REG_ENTRY(__NV_DMA_REMAP_PEER_MMIO, NV_DMA_REMAP_PEER_MMIO_ENABLE);
NV_DEFINE_REG_STRING_ENTRY(__NV_RM_NVLINK_BW, NULL);
NV_DEFINE_REG_ENTRY_GLOBAL(__NV_IMEX_CHANNEL_COUNT, 2048);
/*
*----------------registry database definition----------------------
@ -933,6 +973,7 @@ nv_parm_t nv_parms[] = {
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_ENABLE_DBG_BREAKPOINT),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_OPENRM_ENABLE_UNSUPPORTED_GPUS),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_DMA_REMAP_PEER_MMIO),
NV_DEFINE_PARAMS_TABLE_ENTRY(__NV_IMEX_CHANNEL_COUNT),
{NULL, NULL}
};

View File

@ -621,6 +621,14 @@ typedef enum
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
/*
* For console setup by EFI GOP, the base address is BAR1.
* For console setup by VBIOS, the base address is BAR2 + 16MB.
*/
#define NV_IS_CONSOLE_MAPPED(nv, addr) \
(((addr) == (nv)->bars[NV_GPU_BAR_INDEX_FB].cpu_address) || \
((addr) == ((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000)))
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
((nv)->iommus.iso_iommu_present)
@ -878,6 +886,8 @@ NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
NvU32 NV_API_CALL nv_get_os_type(void);
void NV_API_CALL nv_get_updated_emu_seg(NvU32 *start, NvU32 *end);
void NV_API_CALL nv_get_screen_info(nv_state_t *, NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64 *);
struct dma_buf;
typedef struct nv_dma_buf nv_dma_buf_t;
struct drm_gem_object;

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -160,10 +160,9 @@ NvBool NV_API_CALL os_is_vgx_hyper (void);
NV_STATUS NV_API_CALL os_inject_vgx_msi (NvU16, NvU64, NvU32);
NvBool NV_API_CALL os_is_grid_supported (void);
NvU32 NV_API_CALL os_get_grid_csp_support (void);
void NV_API_CALL os_get_screen_info (NvU64 *, NvU32 *, NvU32 *, NvU32 *, NvU32 *, NvU64, NvU64);
void NV_API_CALL os_bug_check (NvU32, const char *);
NV_STATUS NV_API_CALL os_lock_user_pages (void *, NvU64, void **, NvU32);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **, void**);
NV_STATUS NV_API_CALL os_lookup_user_io_memory (void *, NvU64, NvU64 **);
NV_STATUS NV_API_CALL os_unlock_user_pages (NvU64, void *);
NV_STATUS NV_API_CALL os_match_mmap_offset (void *, NvU64, NvU64 *);
NV_STATUS NV_API_CALL os_get_euid (NvU32 *);
@ -198,6 +197,8 @@ nv_cap_t* NV_API_CALL os_nv_cap_create_file_entry (nv_cap_t *, const char *,
void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
void NV_API_CALL os_nv_cap_close_fd (int);
NvS32 NV_API_CALL os_imex_channel_get (NvU64);
NvS32 NV_API_CALL os_imex_channel_count (void);
enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
@ -219,6 +220,7 @@ extern NvU8 os_page_shift;
extern NvBool os_cc_enabled;
extern NvBool os_cc_tdx_enabled;
extern NvBool os_dma_buf_enabled;
extern NvBool os_imex_channel_is_supported;
/*
* ---------------------------------------------------------------------------

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -167,25 +167,11 @@ static void RmCreateOsDescriptor(NVOS32_PARAMETERS *pApi, API_SECURITY_INFO secI
}
else if (rmStatus == NV_ERR_INVALID_ADDRESS)
{
rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount,
&pPteArray, &pPageArray);
rmStatus = os_lookup_user_io_memory(pDescriptor, pageCount, &pPteArray);
if (rmStatus == NV_OK)
{
if (pPageArray != NULL)
{
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPageArray;
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_PAGE_ARRAY;
}
else if (pPteArray != NULL)
{
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
}
else
{
NV_ASSERT_FAILED("unknown memory import type");
rmStatus = NV_ERR_NOT_SUPPORTED;
}
pApi->data.AllocOsDesc.descriptor = (NvP64)(NvUPtr)pPteArray;
pApi->data.AllocOsDesc.descriptorType = NVOS32_DESCRIPTOR_TYPE_OS_IO_MEMORY;
}
}
if (rmStatus != NV_OK)

View File

@ -1165,6 +1165,11 @@ NV_STATUS osIsVfioPciCorePresent(void)
return os_call_vgpu_vfio((void *) &vgpu_info, CMD_VFIO_PCI_CORE_PRESENT);
}
NvU32 osGetGridCspSupport(void)
{
return os_get_grid_csp_support();
}
void initVGXSpecificRegistry(OBJGPU *pGpu)
{
NvU32 data32;

View File

@ -4828,6 +4828,39 @@ osRmCapInitDescriptor
*pCapDescriptor = NV_U64_MAX;
}
/*
* @brief Checks if IMEX channel support is present.
*/
NvBool
osImexChannelIsSupported(void)
{
return os_imex_channel_is_supported;
}
/*
* @brief Returns IMEX channel count.
*/
NvS32
osImexChannelCount
(
void
)
{
return os_imex_channel_count();
}
/*
* @brief Returns IMEX channel number.
*
* @param[in] descriptor OS specific descriptor to query channel number.
*
*/
NvS32
osImexChannelGet(NvU64 descriptor)
{
return os_imex_channel_get(descriptor);
}
/*
* @brief Generates random bytes which can be used as a universally unique
* identifier.

View File

@ -2153,6 +2153,7 @@ static NV_STATUS RmCreateMmapContextLocked(
RsClient *pClient = staticCast(pRmClient, RsClient);
KernelMemorySystem *pKernelMemorySystem = NULL;
NvBool bCoherentAtsCpuOffset = NV_FALSE;
NvBool bSriovHostCoherentFbOffset = NV_FALSE;
nv_state_t *pNv = NULL;
NvU64 addr = (NvU64)address;
NvU32 prot = 0;
@ -2200,6 +2201,8 @@ static NV_STATUS RmCreateMmapContextLocked(
pNv = NV_GET_NV_STATE(pGpu);
pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
bCoherentAtsCpuOffset = IS_COHERENT_CPU_ATS_OFFSET(pKernelMemorySystem, addr, size);
bSriovHostCoherentFbOffset = os_is_vgx_hyper() &&
IS_COHERENT_FB_OFFSET(pKernelMemorySystem, addr, size);
}
//
@ -2210,7 +2213,7 @@ static NV_STATUS RmCreateMmapContextLocked(
if ((pNv == NULL) ||
(!IS_REG_OFFSET(pNv, addr, size) &&
!IS_FB_OFFSET(pNv, addr, size) &&
!bCoherentAtsCpuOffset &&
!(bCoherentAtsCpuOffset || bSriovHostCoherentFbOffset) &&
!IS_IMEM_OFFSET(pNv, addr, size)))
{
pNv = nv_get_ctl_state();
@ -2240,6 +2243,38 @@ static NV_STATUS RmCreateMmapContextLocked(
goto done;
}
}
else if (bSriovHostCoherentFbOffset)
{
status = RmGetMmapPteArray(pKernelMemorySystem, pClient, hMemory, nvuap);
if (status != NV_OK)
{
goto done;
}
//
// nvuap->page_array(allocated in RmGetMmapPteArray) is not assigned
// to nvamc->page_array if onlining status is false(which is the case with
// bSriovHostCoherentFbOffset) and so doesn't get freed if not done here.
// The call to RmGetMmapPteArray is for getting the contig and num
// pages of the allocation.
//
os_free_mem(nvuap->page_array);
nvuap->page_array = NULL;
//
// This path is taken in the case of self-hosted SRIOV host where
// the coherent GPU memory is not onlined but the CPU mapping to
// the coherent GPU memory is done via C2C(instead of BAR1) and so
// only contig can be supported for now.
//
if (!nvuap->contig && (nvuap->num_pages > 1))
{
NV_PRINTF(LEVEL_ERROR, "Mapping of Non-contig allocation for "
"not onlined coherent GPU memory not supported\n");
status = NV_ERR_NOT_SUPPORTED;
goto done;
}
}
if (RmSetUserMapAccessRange(nvuap) != NV_OK)
{
@ -5377,16 +5412,11 @@ NvBool rm_get_uefi_console_status(
NvU64 fbBaseAddress = 0;
NvBool bConsoleDevice = NV_FALSE;
// os_get_screen_info() will return dimensions and an address for
// any fbdev driver (e.g., efifb, vesafb, etc). To find if this is a
// UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
// driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
// it will point to BAR2 + 16MB.
os_get_screen_info(&fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
//
// nv_get_screen_info() will return dimensions and an address for
// any fbdev driver (e.g., efifb, vesafb, etc).
//
nv_get_screen_info(nv, &fbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);
bConsoleDevice = (fbSize != 0);
@ -5403,16 +5433,11 @@ NvU64 rm_get_uefi_console_size(
fbSize = fbWidth = fbHeight = fbDepth = fbPitch = 0;
// os_get_screen_info() will return dimensions and an address for
// any fbdev driver (e.g., efifb, vesafb, etc). To find if this is a
// UEFI console check the fbBaseAddress: if it was set up by the EFI GOP
// driver, it will point into BAR1 (FB); if it was set up by the VBIOS,
// it will point to BAR2 + 16MB.
os_get_screen_info(pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch,
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
fbSize = (NvU64)fbHeight * (NvU64)fbPitch;
//
// nv_get_screen_info() will return dimensions and an address for
// any fbdev driver (e.g., efifb, vesafb, etc).
//
nv_get_screen_info(nv, pFbBaseAddress, &fbWidth, &fbHeight, &fbDepth, &fbPitch, &fbSize);
return fbSize;
}

View File

@ -582,13 +582,14 @@ NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *sp,
return rmStatus;
}
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
gpuDeviceHandle device)
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *sp,
gpuFaultInfo *pFaultInfo,
NvBool bCopyAndFlush)
{
NV_STATUS rmStatus;
void *fp;
NV_ENTER_RM_RUNTIME(sp,fp);
rmStatus = nvGpuOpsFlushReplayableFaultBuffer(device);
rmStatus = nvGpuOpsFlushReplayableFaultBuffer(pFaultInfo, bCopyAndFlush);
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}

View File

@ -348,7 +348,7 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL
if ((memmgrGetReservedConsoleMemDesc(pGpu, pMemoryManager) != NULL) && bContinue)
{
NvU64 baseAddr;
NvU64 baseAddr, size;
NvU32 width, height, depth, pitch;
// There should only be one.
@ -357,9 +357,8 @@ NV_STATUS deviceCtrlCmdOsUnixVTGetFBInfo_IMPL
pParams->subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
// Console is either mapped to BAR1 or BAR2 + 16 MB
os_get_screen_info(&baseAddr, &width, &height, &depth, &pitch,
nv->bars[NV_GPU_BAR_INDEX_FB].cpu_address,
nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + 0x1000000);
nv_get_screen_info(nv, &baseAddr, &width, &height, &depth,
&pitch, &size);
pParams->width = (NvU16)width;
pParams->height = (NvU16)height;

View File

@ -622,7 +622,7 @@ NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(
// Copies all valid packets in RM's and client's shadow buffer
status = kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, &threadState, faultsCopied,
NON_REPLAYABLE_FAULT_BUFFER);
NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);
threadStateFreeISRAndDeferredIntHandler(&threadState, pGpu, THREAD_STATE_FLAGS_IS_ISR);
tlsIsrDestroy(pIsrAllocator);
@ -659,7 +659,7 @@ static NV_STATUS _rm_gpu_copy_mmu_faults_unlocked(
// Copies all valid packets in RM's and client's shadow buffer
return kgmmuCopyMmuFaults_HAL(pGpu, pKernelGmmu, pThreadState, pFaultsCopied,
NON_REPLAYABLE_FAULT_BUFFER);
NON_REPLAYABLE_FAULT_BUFFER, NV_FALSE);
return NV_OK;
}

View File

@ -156,6 +156,7 @@ struct RmClient {
PSECURITY_TOKEN pSecurityToken;
struct UserInfo *pUserInfo;
NvBool bIsClientVirtualMode;
NvS32 imexChannel;
PNODE pCliSyncGpuBoostTree;
};

View File

@ -1136,28 +1136,6 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
pThis->__gpuUpdateErrorContainmentState__ = &gpuUpdateErrorContainmentState_c04480;
}
// Hal function -- gpuCheckEccCounts
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */
{
pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_TU102;
}
// default
else
{
pThis->__gpuCheckEccCounts__ = &gpuCheckEccCounts_d44104;
}
// Hal function -- gpuClearEccCounts
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000420UL) )) /* ChipHal: TU102 | GA100 | GH100 */
{
pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_TU102;
}
// default
else
{
pThis->__gpuClearEccCounts__ = &gpuClearEccCounts_ac1694;
}
// Hal function -- gpuWaitForGfwBootComplete
if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000001UL) )) /* RmVariantHal: VF */
{

View File

@ -7,7 +7,7 @@ extern "C" {
#endif
/*
* SPDX-FileCopyrightText: Copyright (c) 2004-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2004-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -973,8 +973,6 @@ struct OBJGPU {
NvBool (*__gpuIsDevModeEnabledInHw__)(struct OBJGPU *);
NvBool (*__gpuIsCtxBufAllocInPmaSupported__)(struct OBJGPU *);
NV_STATUS (*__gpuUpdateErrorContainmentState__)(struct OBJGPU *, NV_ERROR_CONT_ERR_ID, NV_ERROR_CONT_LOCATION, NvU32 *);
void (*__gpuCheckEccCounts__)(struct OBJGPU *);
NV_STATUS (*__gpuClearEccCounts__)(struct OBJGPU *);
NV_STATUS (*__gpuWaitForGfwBootComplete__)(struct OBJGPU *);
NvBool (*__gpuGetIsCmpSku__)(struct OBJGPU *);
NvBool PDB_PROP_GPU_HIGH_SPEED_BRIDGE_CONNECTED;
@ -1236,6 +1234,7 @@ struct OBJGPU {
NvBool bStateUnloading;
NvBool bStateLoaded;
NvBool bFullyConstructed;
NvBool bRecoveryMarginPresent;
NvBool bBf3WarBug4040336Enabled;
NvBool bUnifiedMemorySpaceEnabled;
NvBool bSriovEnabled;
@ -1633,10 +1632,6 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
#define gpuIsCtxBufAllocInPmaSupported_HAL(pGpu) gpuIsCtxBufAllocInPmaSupported_DISPATCH(pGpu)
#define gpuUpdateErrorContainmentState(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
#define gpuUpdateErrorContainmentState_HAL(pGpu, arg0, arg1, arg2) gpuUpdateErrorContainmentState_DISPATCH(pGpu, arg0, arg1, arg2)
#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts_DISPATCH(pGpu)
#define gpuClearEccCounts(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
#define gpuClearEccCounts_HAL(pGpu) gpuClearEccCounts_DISPATCH(pGpu)
#define gpuWaitForGfwBootComplete(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
#define gpuWaitForGfwBootComplete_HAL(pGpu) gpuWaitForGfwBootComplete_DISPATCH(pGpu)
#define gpuGetIsCmpSku(pGpu) gpuGetIsCmpSku_DISPATCH(pGpu)
@ -2557,6 +2552,34 @@ static inline NV_STATUS gpuSetPartitionErrorAttribution(struct OBJGPU *pGpu, NV_
#define gpuSetPartitionErrorAttribution_HAL(pGpu, arg0, arg1, arg2) gpuSetPartitionErrorAttribution(pGpu, arg0, arg1, arg2)
NV_STATUS gpuCreateRusdMemory_IMPL(struct OBJGPU *pGpu);
#ifdef __nvoc_gpu_h_disabled
static inline NV_STATUS gpuCreateRusdMemory(struct OBJGPU *pGpu) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_gpu_h_disabled
#define gpuCreateRusdMemory(pGpu) gpuCreateRusdMemory_IMPL(pGpu)
#endif //__nvoc_gpu_h_disabled
#define gpuCreateRusdMemory_HAL(pGpu) gpuCreateRusdMemory(pGpu)
NvBool gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
#ifdef __nvoc_gpu_h_disabled
static inline NvBool gpuCheckEccCounts(struct OBJGPU *pGpu) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
return NV_FALSE;
}
#else //__nvoc_gpu_h_disabled
#define gpuCheckEccCounts(pGpu) gpuCheckEccCounts_TU102(pGpu)
#endif //__nvoc_gpu_h_disabled
#define gpuCheckEccCounts_HAL(pGpu) gpuCheckEccCounts(pGpu)
NV_STATUS gpuConstructDeviceInfoTable_FWCLIENT(struct OBJGPU *pGpu);
NV_STATUS gpuConstructDeviceInfoTable_VGPUSTUB(struct OBJGPU *pGpu);
@ -3147,26 +3170,6 @@ static inline NV_STATUS gpuUpdateErrorContainmentState_DISPATCH(struct OBJGPU *p
return pGpu->__gpuUpdateErrorContainmentState__(pGpu, arg0, arg1, arg2);
}
static inline void gpuCheckEccCounts_d44104(struct OBJGPU *pGpu) {
return;
}
void gpuCheckEccCounts_TU102(struct OBJGPU *pGpu);
static inline void gpuCheckEccCounts_DISPATCH(struct OBJGPU *pGpu) {
pGpu->__gpuCheckEccCounts__(pGpu);
}
static inline NV_STATUS gpuClearEccCounts_ac1694(struct OBJGPU *pGpu) {
return NV_OK;
}
NV_STATUS gpuClearEccCounts_TU102(struct OBJGPU *pGpu);
static inline NV_STATUS gpuClearEccCounts_DISPATCH(struct OBJGPU *pGpu) {
return pGpu->__gpuClearEccCounts__(pGpu);
}
NV_STATUS gpuWaitForGfwBootComplete_TU102(struct OBJGPU *pGpu);
static inline NV_STATUS gpuWaitForGfwBootComplete_5baef9(struct OBJGPU *pGpu) {
@ -4458,16 +4461,6 @@ static inline NV_STATUS gpuSanityCheckRegisterAccess(struct OBJGPU *pGpu, NvU32
#define gpuSanityCheckRegisterAccess(pGpu, addr, pRetVal) gpuSanityCheckRegisterAccess_IMPL(pGpu, addr, pRetVal)
#endif //__nvoc_gpu_h_disabled
void gpuUpdateUserSharedData_IMPL(struct OBJGPU *pGpu);
#ifdef __nvoc_gpu_h_disabled
static inline void gpuUpdateUserSharedData(struct OBJGPU *pGpu) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
}
#else //__nvoc_gpu_h_disabled
#define gpuUpdateUserSharedData(pGpu) gpuUpdateUserSharedData_IMPL(pGpu)
#endif //__nvoc_gpu_h_disabled
NV_STATUS gpuValidateRegOffset_IMPL(struct OBJGPU *pGpu, NvU32 arg0);
#ifdef __nvoc_gpu_h_disabled
@ -4523,6 +4516,38 @@ static inline NV_STATUS gpuGc6Exit(struct OBJGPU *pGpu, NV2080_CTRL_GC6_EXIT_PAR
#define gpuGc6Exit(pGpu, arg0) gpuGc6Exit_IMPL(pGpu, arg0)
#endif //__nvoc_gpu_h_disabled
void gpuDestroyRusdMemory_IMPL(struct OBJGPU *pGpu);
#ifdef __nvoc_gpu_h_disabled
static inline void gpuDestroyRusdMemory(struct OBJGPU *pGpu) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
}
#else //__nvoc_gpu_h_disabled
#define gpuDestroyRusdMemory(pGpu) gpuDestroyRusdMemory_IMPL(pGpu)
#endif //__nvoc_gpu_h_disabled
NV_STATUS gpuEnableAccounting_IMPL(struct OBJGPU *arg0);
#ifdef __nvoc_gpu_h_disabled
static inline NV_STATUS gpuEnableAccounting(struct OBJGPU *arg0) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_gpu_h_disabled
#define gpuEnableAccounting(arg0) gpuEnableAccounting_IMPL(arg0)
#endif //__nvoc_gpu_h_disabled
NV_STATUS gpuDisableAccounting_IMPL(struct OBJGPU *arg0, NvBool bForce);
#ifdef __nvoc_gpu_h_disabled
static inline NV_STATUS gpuDisableAccounting(struct OBJGPU *arg0, NvBool bForce) {
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_gpu_h_disabled
#define gpuDisableAccounting(arg0, bForce) gpuDisableAccounting_IMPL(arg0, bForce)
#endif //__nvoc_gpu_h_disabled
#undef PRIVATE_FIELD

View File

@ -2159,6 +2159,16 @@ NV_STATUS rpcMapMemoryDma_STUB(
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
}
// RPC:hal:CTRL_SET_VGPU_FB_USAGE - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
NV_STATUS rpcCtrlSetVgpuFbUsage_STUB(
POBJGPU pGpu,
POBJRPC pRpc,
void *pArg3
)
{
return NV_VGPU_MSG_RESULT_RPC_UNKNOWN_FUNCTION;
}
// RPC:hal:UNMAP_MEMORY_DMA - TU10X, GA100, GA102, GA103, GA104, GA106, GA107, AD102, AD103, AD104, AD106, AD107, GH10X
NV_STATUS rpcUnmapMemoryDma_STUB(
POBJGPU pGpu,

View File

@ -1303,17 +1303,6 @@ static void __nvoc_init_funcTable_KernelBus_1(KernelBus *pThis, RmHalspecOwner *
pThis->__kbusGetEccCounts__ = &kbusGetEccCounts_4a4dee;
}
// Hal function -- kbusClearEccCounts
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_GH100;
}
// default
else
{
pThis->__kbusClearEccCounts__ = &kbusClearEccCounts_b3696a;
}
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBus_engstateConstructEngine;
pThis->__nvoc_base_OBJENGSTATE.__engstateStatePreInitLocked__ = &__nvoc_thunk_KernelBus_engstateStatePreInitLocked;

View File

@ -428,7 +428,6 @@ struct KernelBus {
void (*__kbusTeardownCoherentCpuMapping__)(struct OBJGPU *, struct KernelBus *, NvBool);
NV_STATUS (*__kbusBar1InstBlkBind__)(struct OBJGPU *, struct KernelBus *);
NvU32 (*__kbusGetEccCounts__)(struct OBJGPU *, struct KernelBus *);
void (*__kbusClearEccCounts__)(struct OBJGPU *, struct KernelBus *);
NV_STATUS (*__kbusStateInitUnlocked__)(POBJGPU, struct KernelBus *);
void (*__kbusInitMissing__)(POBJGPU, struct KernelBus *);
NV_STATUS (*__kbusStatePreInitUnlocked__)(POBJGPU, struct KernelBus *);
@ -730,8 +729,6 @@ NV_STATUS __nvoc_objCreate_KernelBus(KernelBus**, Dynamic*, NvU32);
#define kbusBar1InstBlkBind_HAL(pGpu, pKernelBus) kbusBar1InstBlkBind_DISPATCH(pGpu, pKernelBus)
#define kbusGetEccCounts(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
#define kbusGetEccCounts_HAL(pGpu, pKernelBus) kbusGetEccCounts_DISPATCH(pGpu, pKernelBus)
#define kbusClearEccCounts(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
#define kbusClearEccCounts_HAL(pGpu, pKernelBus) kbusClearEccCounts_DISPATCH(pGpu, pKernelBus)
#define kbusStateInitUnlocked(pGpu, pEngstate) kbusStateInitUnlocked_DISPATCH(pGpu, pEngstate)
#define kbusInitMissing(pGpu, pEngstate) kbusInitMissing_DISPATCH(pGpu, pEngstate)
#define kbusStatePreInitUnlocked(pGpu, pEngstate) kbusStatePreInitUnlocked_DISPATCH(pGpu, pEngstate)
@ -2531,16 +2528,6 @@ static inline NvU32 kbusGetEccCounts_DISPATCH(struct OBJGPU *pGpu, struct Kernel
return pKernelBus->__kbusGetEccCounts__(pGpu, pKernelBus);
}
void kbusClearEccCounts_GH100(struct OBJGPU *pGpu, struct KernelBus *pKernelBus);
static inline void kbusClearEccCounts_b3696a(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
return;
}
static inline void kbusClearEccCounts_DISPATCH(struct OBJGPU *pGpu, struct KernelBus *pKernelBus) {
pKernelBus->__kbusClearEccCounts__(pGpu, pKernelBus);
}
static inline NV_STATUS kbusStateInitUnlocked_DISPATCH(POBJGPU pGpu, struct KernelBus *pEngstate) {
return pEngstate->__kbusStateInitUnlocked__(pGpu, pEngstate);
}
@ -2625,6 +2612,10 @@ static inline NvBool kbusIsBarAccessBlocked(struct KernelBus *pKernelBus) {
return pKernelBus->bBarAccessBlocked;
}
static inline void kbusSetFlaSupported(struct KernelBus *pKernelBus, NvBool bSupported) {
pKernelBus->bFlaSupported = bSupported;
}
void kbusDestruct_IMPL(struct KernelBus *pKernelBus);
#define __nvoc_kbusDestruct(pKernelBus) kbusDestruct_IMPL(pKernelBus)
@ -2719,6 +2710,9 @@ static inline NV_STATUS kbusIsGpuP2pAlive(struct OBJGPU *pGpu, struct KernelBus
#define kbusIsGpuP2pAlive(pGpu, pKernelBus) kbusIsGpuP2pAlive_IMPL(pGpu, pKernelBus)
#endif //__nvoc_kern_bus_h_disabled
NV_STATUS kbusUpdateRusdStatistics_IMPL(struct OBJGPU *pGpu);
#define kbusUpdateRusdStatistics(pGpu) kbusUpdateRusdStatistics_IMPL(pGpu)
void kbusDetermineBar1Force64KBMapping_IMPL(struct KernelBus *pKernelBus);
#ifdef __nvoc_kern_bus_h_disabled

Some files were not shown because too many files have changed in this diff Show More