565.57.01
This commit is contained in:
parent
ed4be64962
commit
d5a0858f90
13
README.md
13
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 560.35.03.
|
||||
version 565.57.01.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
560.35.03 driver release. This can be achieved by installing
|
||||
565.57.01 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/560.35.03/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/565.57.01/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@ -199,6 +199,7 @@ Subsystem Device ID.
|
||||
| NVIDIA TITAN RTX | 1E02 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
|
||||
| NVIDIA CMP 50HX | 1E09 |
|
||||
| Quadro RTX 6000 | 1E30 |
|
||||
| Quadro RTX 8000 | 1E30 1028 129E |
|
||||
| Quadro RTX 8000 | 1E30 103C 129E |
|
||||
@ -391,6 +392,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2070 | 1F07 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F08 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F0A |
|
||||
| NVIDIA CMP 40HX | 1F0B |
|
||||
| NVIDIA GeForce RTX 2070 | 1F10 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
|
||||
@ -691,6 +693,7 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
| NVIDIA GeForce GTX 1650 | 2188 |
|
||||
| NVIDIA CMP 30HX | 2189 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2191 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
|
||||
@ -758,9 +761,11 @@ Subsystem Device ID.
|
||||
| NVIDIA H200 | 2335 10DE 18BF |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA H200 NVL | 233B 10DE 1996 |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 1805 |
|
||||
| NVIDIA GH200 480GB | 2342 10DE 1809 |
|
||||
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
@ -831,12 +836,10 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA RTX A1000 | 25B0 1028 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
|
||||
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
|
||||
| NVIDIA RTX A400 | 25B2 1028 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 8D95 |
|
||||
| NVIDIA RTX A400 | 25B2 10DE 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 17AA 1879 |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"560.35.03\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"565.57.01\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2016-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -43,6 +43,8 @@ struct nv_kthread_q
|
||||
atomic_t main_loop_should_exit;
|
||||
|
||||
struct task_struct *q_kthread;
|
||||
|
||||
bool is_unload_flush_ongoing;
|
||||
};
|
||||
|
||||
struct nv_kthread_q_item
|
||||
|
@ -724,6 +724,7 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_GET_OFFSET_IN_PAGE(phys_page) offset_in_page(phys_page)
|
||||
#define NV_GET_PAGE_STRUCT(phys_page) virt_to_page(__va(phys_page))
|
||||
#define NV_VMA_PGOFF(vma) ((vma)->vm_pgoff)
|
||||
#define NV_VMA_SIZE(vma) ((vma)->vm_end - (vma)->vm_start)
|
||||
@ -951,14 +952,14 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot, NvU64 start)
|
||||
{
|
||||
int ret = -1;
|
||||
#if !defined(NV_XEN_SUPPORT_FULLY_VIRTUALIZED_KERNEL)
|
||||
ret = nv_remap_page_range(vma, vma->vm_start, phys_addr, size,
|
||||
ret = nv_remap_page_range(vma, start, phys_addr, size,
|
||||
nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
#else
|
||||
ret = io_remap_pfn_range(vma, vma->vm_start, (phys_addr >> PAGE_SHIFT),
|
||||
ret = io_remap_pfn_range(vma, start, (phys_addr >> PAGE_SHIFT),
|
||||
size, nv_adjust_pgprot(vma->vm_page_prot, extra_prot));
|
||||
#endif
|
||||
return ret;
|
||||
@ -1207,6 +1208,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
NvBool coherent : 1;
|
||||
NvBool carveout : 1;
|
||||
} flags;
|
||||
unsigned int cache_type;
|
||||
unsigned int num_pages;
|
||||
@ -1840,20 +1842,6 @@ static inline int nv_is_control_device(struct inode *inode)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline NvU64 nv_pci_bus_address(struct pci_dev *dev, NvU8 bar_index)
|
||||
{
|
||||
NvU64 bus_addr = 0;
|
||||
#if defined(NV_PCI_BUS_ADDRESS_PRESENT)
|
||||
bus_addr = pci_bus_address(dev, bar_index);
|
||||
#elif defined(CONFIG_PCI)
|
||||
struct pci_bus_region region;
|
||||
|
||||
pcibios_resource_to_bus(dev, ®ion, &dev->resource[bar_index]);
|
||||
bus_addr = region.start;
|
||||
#endif
|
||||
return bus_addr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrements the usage count of the allocation, and moves the allocation to
|
||||
* the given nvlfp's free list if the usage count drops to zero.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -59,6 +59,8 @@ NV_STATUS nv_uvm_resume (void);
|
||||
void nv_uvm_notify_start_device (const NvU8 *uuid);
|
||||
void nv_uvm_notify_stop_device (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_event_interrupt (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_drain_P2P (const NvU8 *uuid);
|
||||
NV_STATUS nv_uvm_resume_P2P (const NvU8 *uuid);
|
||||
|
||||
/* Move these to nv.h once implemented by other UNIX platforms */
|
||||
NvBool nvidia_get_gpuid_list (NvU32 *gpu_ids, NvU32 *gpu_count);
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <nv-ioctl.h>
|
||||
#include <nv-ioctl-numa.h>
|
||||
#include <nvmisc.h>
|
||||
#include <os/nv_memory_area.h>
|
||||
|
||||
extern nv_cap_t *nvidia_caps_root;
|
||||
|
||||
@ -279,8 +280,7 @@ typedef struct nv_usermap_access_params_s
|
||||
NvU64 offset;
|
||||
NvU64 *page_array;
|
||||
NvU64 num_pages;
|
||||
NvU64 mmap_start;
|
||||
NvU64 mmap_size;
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
@ -296,8 +296,7 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
NvU64 page_index;
|
||||
NvU64 *page_array;
|
||||
NvU64 num_pages;
|
||||
NvU64 mmap_start;
|
||||
NvU64 mmap_size;
|
||||
MemoryArea memArea;
|
||||
NvU64 access_start;
|
||||
NvU64 access_size;
|
||||
NvU64 remap_prot_extra;
|
||||
@ -330,7 +329,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
#define NV_MAX_SOC_IRQS 10
|
||||
#define NV_MAX_DPAUX_NUM_DEVICES 4
|
||||
|
||||
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2
|
||||
@ -535,6 +534,7 @@ typedef struct UvmGpuAddressSpaceInfo_tag *nvgpuAddressSpaceInfo_t;
|
||||
typedef struct UvmGpuAllocInfo_tag *nvgpuAllocInfo_t;
|
||||
typedef struct UvmGpuP2PCapsParams_tag *nvgpuP2PCapsParams_t;
|
||||
typedef struct UvmGpuFbInfo_tag *nvgpuFbInfo_t;
|
||||
typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
|
||||
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
|
||||
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
|
||||
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
|
||||
@ -545,6 +545,7 @@ typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t
|
||||
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
|
||||
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
|
||||
typedef struct UvmGpuExternalMappingInfo_tag *nvgpuExternalMappingInfo_t;
|
||||
typedef struct UvmGpuExternalPhysAddrInfo_tag *nvgpuExternalPhysAddrInfo_t;
|
||||
typedef struct UvmGpuChannelResourceInfo_tag *nvgpuChannelResourceInfo_t;
|
||||
typedef struct UvmGpuChannelInstanceInfo_tag *nvgpuChannelInstanceInfo_t;
|
||||
typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindParams_t;
|
||||
@ -783,7 +784,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU64, NvU32, NvU32, NvU64, NvU64 *, NvBool, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvU64, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
@ -904,6 +905,9 @@ void NV_API_CALL nv_dma_release_dma_buf (nv_dma_buf_t *);
|
||||
|
||||
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_schedule_uvm_drain_p2p (NvU8 *);
|
||||
void NV_API_CALL nv_schedule_uvm_resume_p2p (NvU8 *);
|
||||
|
||||
NvBool NV_API_CALL nv_platform_supports_s0ix (void);
|
||||
NvBool NV_API_CALL nv_s2idle_pm_configured (void);
|
||||
|
||||
@ -1001,8 +1005,8 @@ NV_STATUS NV_API_CALL rm_p2p_put_pages_persistent(nvidia_stack_t *, void *, vo
|
||||
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages (nvidia_stack_t *, nv_dma_device_t *, NvU8 *, NvU64, NvU32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_dup_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, NvHandle, void *, NvHandle, NvU64, NvU64, NvHandle *, void **);
|
||||
void NV_API_CALL rm_dma_buf_undup_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, NvU64, void *, nv_phys_addr_range_t **, NvU32 *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvU64, nv_phys_addr_range_t **, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_map_mem_handle (nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, MemoryRange, void *, NvBool, MemoryArea *);
|
||||
void NV_API_CALL rm_dma_buf_unmap_mem_handle(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, void *, NvBool, MemoryArea);
|
||||
NV_STATUS NV_API_CALL rm_dma_buf_get_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle *, NvHandle *, NvHandle *, void **, NvBool *);
|
||||
void NV_API_CALL rm_dma_buf_put_client_and_device(nvidia_stack_t *, nv_state_t *, NvHandle, NvHandle, NvHandle, void *);
|
||||
NV_STATUS NV_API_CALL rm_log_gpu_crash (nv_stack_t *, nv_state_t *);
|
||||
|
@ -1085,6 +1085,22 @@ NV_STATUS nvUvmInterfaceRegisterUvmCallbacks(struct UvmOpsUvmEvents *importedUvm
|
||||
//
|
||||
void nvUvmInterfaceDeRegisterUvmOps(void);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceGetNvlinkInfo
|
||||
|
||||
Gets NVLINK information from RM.
|
||||
|
||||
Arguments:
|
||||
device[IN] - GPU device handle
|
||||
nvlinkInfo [OUT] - Pointer to NvlinkInfo structure
|
||||
|
||||
Error codes:
|
||||
NV_ERROR
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetNvlinkInfo(uvmGpuDeviceHandle device,
|
||||
UvmGpuNvlinkInfo *nvlinkInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceP2pObjectCreate
|
||||
|
||||
@ -1161,6 +1177,48 @@ NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvU64 size,
|
||||
UvmGpuExternalMappingInfo *gpuExternalMappingInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceGetExternalAllocPhysAddrs
|
||||
|
||||
The interface builds the RM physical addrs using the provided input parameters.
|
||||
|
||||
Arguments:
|
||||
vaSpace[IN] - vaSpace handle.
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PhysAddrs
|
||||
should be built.
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
gpuExternalMappingInfo[IN/OUT] - See nv_uvm_types.h for more information.
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
|
||||
NV_ERR_INVALID_OBJECT_HANDLE - Invalid memory handle is passed.
|
||||
NV_ERR_NOT_SUPPORTED - Functionality is not supported (see comments in nv_gpu_ops.c)
|
||||
NV_ERR_INVALID_BASE - offset is beyond the allocation size
|
||||
NV_ERR_INVALID_LIMIT - (offset + size) is beyond the allocation size.
|
||||
NV_ERR_BUFFER_TOO_SMALL - gpuExternalMappingInfo.physAddrBufferSize is insufficient to
|
||||
store single physAddr.
|
||||
NV_ERR_NOT_READY - Returned when querying the physAddrs requires a deferred setup
|
||||
which has not yet completed. It is expected that the caller
|
||||
will reattempt the call until a different code is returned.
|
||||
As an example, multi-node systems which require querying
|
||||
physAddrs from the Fabric Manager may return this code.
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceGetExternalAllocPhysAddrs(uvmGpuAddressSpaceHandle vaSpace,
|
||||
NvHandle hMemory,
|
||||
NvU64 offset,
|
||||
NvU64 size,
|
||||
UvmGpuExternalPhysAddrInfo *gpuExternalPhysAddrsInfo);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceRetainChannel
|
||||
|
||||
@ -1462,6 +1520,16 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
|
||||
char *methodStream,
|
||||
NvU32 methodStreamSize);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceReportFatalError
|
||||
|
||||
Reports a global fatal error so RM can inform the clients that a node reboot
|
||||
is necessary to recover from this error. This function can be called from
|
||||
any lock environment, bottom half or non-interrupt context.
|
||||
|
||||
*/
|
||||
void nvUvmInterfaceReportFatalError(NV_STATUS error);
|
||||
|
||||
/*******************************************************************************
|
||||
Cryptography Services Library (CSL) Interface
|
||||
*/
|
||||
|
@ -543,6 +543,36 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
NvU32 pteSize;
|
||||
} UvmGpuExternalMappingInfo;
|
||||
|
||||
typedef struct UvmGpuExternalPhysAddrInfo_tag
|
||||
{
|
||||
// In: Virtual permissions. Returns
|
||||
// NV_ERR_INVALID_ACCESS_TYPE if input is
|
||||
// inaccurate
|
||||
UvmRmGpuMappingType mappingType;
|
||||
|
||||
// In: Size of the buffer to store PhysAddrs (in bytes).
|
||||
NvU64 physAddrBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PhysAddrs.
|
||||
// Out: The interface will fill the buffer with PhysAddrs
|
||||
NvU64 *physAddrBuffer;
|
||||
|
||||
// Out: Number of PhysAddrs filled in to the buffer.
|
||||
NvU64 numWrittenPhysAddrs;
|
||||
|
||||
// Out: Number of PhysAddrs remaining to be filled
|
||||
// if the buffer is not sufficient to accommodate
|
||||
// requested PhysAddrs.
|
||||
NvU64 numRemainingPhysAddrs;
|
||||
} UvmGpuExternalPhysAddrInfo;
|
||||
|
||||
typedef struct UvmGpuP2PCapsParams_tag
|
||||
{
|
||||
// Out: peerId[i] contains gpu[i]'s peer id of gpu[1 - i]. Only defined if
|
||||
@ -660,6 +690,9 @@ typedef struct UvmGpuInfo_tag
|
||||
// Maximum number of TPCs per GPC
|
||||
NvU32 maxTpcPerGpcCount;
|
||||
|
||||
// Number of access counter buffers.
|
||||
NvU32 accessCntrBufferCount;
|
||||
|
||||
// NV_TRUE if SMC is enabled on this GPU.
|
||||
NvBool smcEnabled;
|
||||
|
||||
@ -721,10 +754,12 @@ typedef struct UvmGpuFbInfo_tag
|
||||
// RM regions that are not registered with PMA either.
|
||||
NvU64 maxAllocatableAddress;
|
||||
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
NvU32 heapSize; // RAM in KB available for user allocations
|
||||
NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation
|
||||
NvBool bZeroFb; // Zero FB mode enabled.
|
||||
NvU64 maxVidmemPageSize; // Largest GPU page size to access vidmem.
|
||||
NvBool bStaticBar1Enabled; // Static BAR1 mode is enabled
|
||||
NvU64 staticBar1Size; // The size of the static mapping
|
||||
} UvmGpuFbInfo;
|
||||
|
||||
typedef struct UvmGpuEccInfo_tag
|
||||
@ -736,6 +771,15 @@ typedef struct UvmGpuEccInfo_tag
|
||||
NvBool bEccEnabled;
|
||||
} UvmGpuEccInfo;
|
||||
|
||||
typedef struct UvmGpuNvlinkInfo_tag
|
||||
{
|
||||
unsigned nvlinkMask;
|
||||
unsigned nvlinkOffset;
|
||||
void *nvlinkReadLocation;
|
||||
NvBool *nvlinkErrorNotifier;
|
||||
NvBool bNvlinkRecoveryEnabled;
|
||||
} UvmGpuNvlinkInfo;
|
||||
|
||||
typedef struct UvmPmaAllocationOptions_tag
|
||||
{
|
||||
NvU32 flags;
|
||||
@ -852,6 +896,41 @@ typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct
|
||||
typedef void (*uvmEventIsrTopHalf_t) (void);
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventDrainP2P
|
||||
This function will be called by the GPU driver to signal to UVM that the
|
||||
GPU has encountered an uncontained error, and all peer work must be drained
|
||||
to recover. When it is called, the following assumptions/guarantees are
|
||||
valid/made:
|
||||
|
||||
* Impacted user channels have been preempted and disabled
|
||||
* UVM channels are still running normally and will continue to do
|
||||
so unless an unrecoverable error is hit on said channels
|
||||
* UVM must not return from this function until all enqueued work on
|
||||
* peer channels has drained
|
||||
* In the context of this function call, RM will still service faults
|
||||
* UVM must prevent new peer work from being enqueued until the
|
||||
uvmEventResumeP2P callback is issued
|
||||
|
||||
Returns:
|
||||
NV_OK if UVM has idled peer work and will prevent new peer workloads.
|
||||
NV_ERR_TIMEOUT if peer work was unable to be drained within a timeout
|
||||
XXX NV_ERR_* for any other failure (TBD)
|
||||
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventDrainP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
/*******************************************************************************
|
||||
uvmEventResumeP2P
|
||||
This function will be called by the GPU driver to signal to UVM that the
|
||||
GPU has recovered from the previously reported uncontained NVLINK error.
|
||||
When it is called, the following assumptions/guarantees are valid/made:
|
||||
|
||||
* UVM is again allowed to enqueue peer work
|
||||
* UVM channels are still running normally
|
||||
*/
|
||||
typedef NV_STATUS (*uvmEventResumeP2P_t) (const NvProcessorUuid *pGpuUuidStruct);
|
||||
|
||||
struct UvmOpsUvmEvents
|
||||
{
|
||||
uvmEventSuspend_t suspend;
|
||||
@ -864,6 +943,8 @@ struct UvmOpsUvmEvents
|
||||
uvmEventWddmRestartAfterTimeout_t wddmRestartAfterTimeout;
|
||||
uvmEventServiceInterrupt_t serviceInterrupt;
|
||||
#endif
|
||||
uvmEventDrainP2P_t drainP2P;
|
||||
uvmEventResumeP2P_t resumeP2P;
|
||||
};
|
||||
|
||||
#define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32
|
||||
@ -1071,11 +1152,13 @@ typedef UvmGpuAccessCntrConfig gpuAccessCntrConfig;
|
||||
typedef UvmGpuFaultInfo gpuFaultInfo;
|
||||
typedef UvmGpuMemoryInfo gpuMemoryInfo;
|
||||
typedef UvmGpuExternalMappingInfo gpuExternalMappingInfo;
|
||||
typedef UvmGpuExternalPhysAddrInfo gpuExternalPhysAddrInfo;
|
||||
typedef UvmGpuChannelResourceInfo gpuChannelResourceInfo;
|
||||
typedef UvmGpuChannelInstanceInfo gpuChannelInstanceInfo;
|
||||
typedef UvmGpuChannelResourceBindParams gpuChannelResourceBindParams;
|
||||
typedef UvmGpuFbInfo gpuFbInfo;
|
||||
typedef UvmGpuEccInfo gpuEccInfo;
|
||||
typedef UvmGpuNvlinkInfo gpuNvlinkInfo;
|
||||
typedef UvmGpuPagingChannel *gpuPagingChannelHandle;
|
||||
typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
|
||||
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
|
||||
|
@ -50,6 +50,8 @@
|
||||
#define NVKMS_LOG2_LUT_ARRAY_SIZE 10
|
||||
#define NVKMS_LUT_ARRAY_SIZE (1 << NVKMS_LOG2_LUT_ARRAY_SIZE)
|
||||
|
||||
#define NVKMS_OLUT_FP_NORM_SCALE_DEFAULT 0xffffffff
|
||||
|
||||
typedef NvU32 NvKmsDeviceHandle;
|
||||
typedef NvU32 NvKmsDispHandle;
|
||||
typedef NvU32 NvKmsConnectorHandle;
|
||||
@ -245,6 +247,80 @@ struct NvKmsLutRamps {
|
||||
NvU16 blue[NVKMS_LUT_ARRAY_SIZE]; /*! in */
|
||||
};
|
||||
|
||||
/* Datatypes for LUT capabilities */
|
||||
enum NvKmsLUTFormat {
|
||||
/*
|
||||
* Normalized fixed-point format mapping [0, 1] to [0x0, 0xFFFF].
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_UNORM16,
|
||||
|
||||
/*
|
||||
* Half-precision floating point.
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_FP16,
|
||||
|
||||
/*
|
||||
* 14-bit fixed-point format required to work around hardware bug 813188.
|
||||
*
|
||||
* To convert from UNORM16 to UNORM14_WAR_813188:
|
||||
* unorm14_war_813188 = ((unorm16 >> 2) & ~7) + 0x6000
|
||||
*/
|
||||
NVKMS_LUT_FORMAT_UNORM14_WAR_813188
|
||||
};
|
||||
|
||||
enum NvKmsLUTVssSupport {
|
||||
NVKMS_LUT_VSS_NOT_SUPPORTED,
|
||||
NVKMS_LUT_VSS_SUPPORTED,
|
||||
NVKMS_LUT_VSS_REQUIRED,
|
||||
};
|
||||
|
||||
enum NvKmsLUTVssType {
|
||||
NVKMS_LUT_VSS_TYPE_NONE,
|
||||
NVKMS_LUT_VSS_TYPE_LINEAR,
|
||||
NVKMS_LUT_VSS_TYPE_LOGARITHMIC,
|
||||
};
|
||||
|
||||
struct NvKmsLUTCaps {
|
||||
/*! Whether this layer or head on this device supports this LUT stage. */
|
||||
NvBool supported;
|
||||
|
||||
/*! Whether this LUT supports VSS. */
|
||||
enum NvKmsLUTVssSupport vssSupport;
|
||||
|
||||
/*!
|
||||
* The type of VSS segmenting this LUT uses.
|
||||
*/
|
||||
enum NvKmsLUTVssType vssType;
|
||||
|
||||
/*!
|
||||
* Expected number of VSS segments.
|
||||
*/
|
||||
NvU32 vssSegments;
|
||||
|
||||
/*!
|
||||
* Expected number of LUT entries.
|
||||
*/
|
||||
NvU32 lutEntries;
|
||||
|
||||
/*!
|
||||
* Format for each of the LUT entries.
|
||||
*/
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
};
|
||||
|
||||
/* each LUT entry uses this many bytes */
|
||||
#define NVKMS_LUT_CAPS_LUT_ENTRY_SIZE (4 * sizeof(NvU16))
|
||||
|
||||
/* if the LUT surface uses VSS, size of the VSS header */
|
||||
#define NVKMS_LUT_VSS_HEADER_SIZE (4 * NVKMS_LUT_CAPS_LUT_ENTRY_SIZE)
|
||||
|
||||
struct NvKmsLUTSurfaceParams {
|
||||
NvKmsSurfaceHandle surfaceHandle;
|
||||
NvU64 offset NV_ALIGN_BYTES(8);
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
};
|
||||
|
||||
/*
|
||||
* A 3x4 row-major colorspace conversion matrix.
|
||||
*
|
||||
@ -463,6 +539,10 @@ struct NvKmsLayerCapabilities {
|
||||
* still expected to honor the NvKmsUsageBounds for each head.
|
||||
*/
|
||||
NvU64 supportedSurfaceMemoryFormats NV_ALIGN_BYTES(8);
|
||||
|
||||
/* Capabilities for each LUT stage in the EVO3 precomp pipeline. */
|
||||
struct NvKmsLUTCaps ilut;
|
||||
struct NvKmsLUTCaps tmo;
|
||||
};
|
||||
|
||||
/*!
|
||||
@ -683,4 +763,20 @@ struct NvKmsSuperframeInfo {
|
||||
} view[NVKMS_MAX_SUPERFRAME_VIEWS];
|
||||
};
|
||||
|
||||
/* Fields within NvKmsVblankSemControlDataOneHead::flags */
|
||||
#define NVKMS_VBLANK_SEM_CONTROL_SWAP_INTERVAL 15:0
|
||||
|
||||
struct NvKmsVblankSemControlDataOneHead {
|
||||
NvU32 requestCounterAccel;
|
||||
NvU32 requestCounter;
|
||||
NvU32 flags;
|
||||
|
||||
NvU32 semaphore;
|
||||
NvU64 vblankCount NV_ALIGN_BYTES(8);
|
||||
};
|
||||
|
||||
struct NvKmsVblankSemControlData {
|
||||
struct NvKmsVblankSemControlDataOneHead head[NV_MAX_HEADS];
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
@ -124,6 +124,14 @@ struct NvKmsKapiDisplayMode {
|
||||
#define NVKMS_KAPI_LAYER_INVALID_IDX 0xff
|
||||
#define NVKMS_KAPI_LAYER_PRIMARY_IDX 0
|
||||
|
||||
struct NvKmsKapiLutCaps {
|
||||
struct {
|
||||
struct NvKmsLUTCaps ilut;
|
||||
struct NvKmsLUTCaps tmo;
|
||||
} layer[NVKMS_KAPI_LAYER_MAX];
|
||||
struct NvKmsLUTCaps olut;
|
||||
};
|
||||
|
||||
struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvU32 numHeads;
|
||||
@ -169,6 +177,8 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsICtCp[NVKMS_KAPI_LAYER_MAX];
|
||||
|
||||
struct NvKmsKapiLutCaps lutCaps;
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@ -262,21 +272,54 @@ struct NvKmsKapiLayerConfig {
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} ilut;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} tmo;
|
||||
|
||||
struct NvKmsCscMatrix csc;
|
||||
NvBool cscUseMain;
|
||||
|
||||
struct {
|
||||
struct NvKmsCscMatrix lmsCtm;
|
||||
struct NvKmsCscMatrix lmsToItpCtm;
|
||||
struct NvKmsCscMatrix itpToLmsCtm;
|
||||
struct NvKmsCscMatrix blendCtm;
|
||||
struct {
|
||||
NvBool lmsCtm : 1;
|
||||
NvBool lmsToItpCtm : 1;
|
||||
NvBool itpToLmsCtm : 1;
|
||||
NvBool blendCtm : 1;
|
||||
} enabled;
|
||||
} matrixOverrides;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
struct NvKmsKapiLayerConfig config;
|
||||
struct {
|
||||
NvBool surfaceChanged : 1;
|
||||
NvBool srcXYChanged : 1;
|
||||
NvBool srcWHChanged : 1;
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool surfaceChanged : 1;
|
||||
NvBool srcXYChanged : 1;
|
||||
NvBool srcWHChanged : 1;
|
||||
NvBool dstXYChanged : 1;
|
||||
NvBool dstWHChanged : 1;
|
||||
NvBool cscChanged : 1;
|
||||
NvBool tfChanged : 1;
|
||||
NvBool hdrMetadataChanged : 1;
|
||||
NvBool matrixOverridesChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
NvBool tmoChanged : 1;
|
||||
} flags;
|
||||
};
|
||||
|
||||
@ -342,18 +385,30 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
struct NvKmsLutRamps *pRamps;
|
||||
} output;
|
||||
} lut;
|
||||
|
||||
struct {
|
||||
NvBool enabled;
|
||||
struct NvKmsKapiSurface *lutSurface;
|
||||
NvU64 offset;
|
||||
NvU32 vssSegments;
|
||||
NvU32 lutEntries;
|
||||
} olut;
|
||||
|
||||
NvU32 olutFpNormScale;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
struct NvKmsKapiHeadModeSetConfig modeSetConfig;
|
||||
struct {
|
||||
NvBool activeChanged : 1;
|
||||
NvBool displaysChanged : 1;
|
||||
NvBool modeChanged : 1;
|
||||
NvBool hdrInfoFrameChanged : 1;
|
||||
NvBool colorimetryChanged : 1;
|
||||
NvBool ilutChanged : 1;
|
||||
NvBool olutChanged : 1;
|
||||
NvBool activeChanged : 1;
|
||||
NvBool displaysChanged : 1;
|
||||
NvBool modeChanged : 1;
|
||||
NvBool hdrInfoFrameChanged : 1;
|
||||
NvBool colorimetryChanged : 1;
|
||||
NvBool legacyIlutChanged : 1;
|
||||
NvBool legacyOlutChanged : 1;
|
||||
NvBool olutChanged : 1;
|
||||
NvBool olutFpNormScaleChanged : 1;
|
||||
} flags;
|
||||
|
||||
struct NvKmsKapiCursorRequestedConfig cursorRequestedConfig;
|
||||
@ -1172,21 +1227,6 @@ struct NvKmsKapiFunctionsTable {
|
||||
NvU64 *pPages
|
||||
);
|
||||
|
||||
/*!
|
||||
* Check if this memory object can be scanned out for display.
|
||||
*
|
||||
* \param [in] device A device allocated using allocateDevice().
|
||||
*
|
||||
* \param [in] memory The memory object to check for display support.
|
||||
*
|
||||
* \return NV_TRUE if this memory can be displayed, NV_FALSE if not.
|
||||
*/
|
||||
NvBool (*isMemoryValidForDisplay)
|
||||
(
|
||||
const struct NvKmsKapiDevice *device,
|
||||
const struct NvKmsKapiMemory *memory
|
||||
);
|
||||
|
||||
/*
|
||||
* Import SGT as a memory handle.
|
||||
*
|
||||
@ -1504,6 +1544,16 @@ struct NvKmsKapiFunctionsTable {
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvS32 index
|
||||
);
|
||||
|
||||
/*
|
||||
* Notify NVKMS that the system's framebuffer console has been disabled and
|
||||
* the reserved allocation for the old framebuffer console can be unmapped.
|
||||
*/
|
||||
void
|
||||
(*framebufferConsoleDisabled)
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
);
|
||||
};
|
||||
|
||||
/** @} */
|
||||
@ -1518,6 +1568,20 @@ NvBool nvKmsKapiGetFunctionsTable
|
||||
struct NvKmsKapiFunctionsTable *funcsTable
|
||||
);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32(NvU16 a);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16(NvU32 a);
|
||||
|
||||
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32(NvU32 a);
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif /* defined(__NVKMS_KAPI_H__) */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2017-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -34,19 +34,25 @@
|
||||
/*
|
||||
* This is the maximum number of GPUs supported in a single system.
|
||||
*/
|
||||
#define NV_MAX_DEVICES 32
|
||||
#define NV_MAX_DEVICES 32
|
||||
|
||||
/*
|
||||
* This is the maximum number of subdevices within a single device.
|
||||
*/
|
||||
#define NV_MAX_SUBDEVICES 8
|
||||
#define NV_MAX_SUBDEVICES 8
|
||||
|
||||
/*
|
||||
* This is the maximum length of the process name string.
|
||||
*/
|
||||
#define NV_PROC_NAME_MAX_LENGTH 100U
|
||||
#define NV_PROC_NAME_MAX_LENGTH 100U
|
||||
|
||||
/*
|
||||
* This is the maximum number of heads per GPU.
|
||||
*/
|
||||
#define NV_MAX_HEADS 4
|
||||
#define NV_MAX_HEADS 4
|
||||
|
||||
/*
|
||||
* Maximum length of a MIG device UUID. It is a 36-byte UUID string plus a
|
||||
* 4-byte prefix and NUL terminator: 'M' 'I' 'G' '-' UUID '\0x0'
|
||||
*/
|
||||
#define NV_MIG_DEVICE_UUID_STR_LENGTH 41U
|
||||
|
@ -721,6 +721,42 @@ nvPrevPow2_U64(const NvU64 x )
|
||||
} \
|
||||
}
|
||||
|
||||
//
|
||||
// Bug 4851259: Newly added functions must be hidden from certain HS-signed
|
||||
// ucode compilers to avoid signature mismatch.
|
||||
//
|
||||
#ifndef NVDEC_1_0
|
||||
/*!
|
||||
* Returns the position of nth set bit in the given mask.
|
||||
*
|
||||
* Returns -1 if mask has fewer than n bits set.
|
||||
*
|
||||
* n is 0 indexed and has valid values 0..31 inclusive, so "zeroth" set bit is
|
||||
* the first set LSB.
|
||||
*
|
||||
* Example, if mask = 0x000000F0u and n = 1, the return value will be 5.
|
||||
* Example, if mask = 0x000000F0u and n = 4, the return value will be -1.
|
||||
*/
|
||||
static NV_FORCEINLINE NvS32
|
||||
nvGetNthSetBitIndex32(NvU32 mask, NvU32 n)
|
||||
{
|
||||
NvU32 seenSetBitsCount = 0;
|
||||
NvS32 index;
|
||||
FOR_EACH_INDEX_IN_MASK(32, index, mask)
|
||||
{
|
||||
if (seenSetBitsCount == n)
|
||||
{
|
||||
return index;
|
||||
}
|
||||
++seenSetBitsCount;
|
||||
}
|
||||
FOR_EACH_INDEX_IN_MASK_END;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif // NVDEC_1_0
|
||||
|
||||
//
|
||||
// Size to use when declaring variable-sized arrays
|
||||
//
|
||||
|
@ -40,8 +40,11 @@
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-kernel-interface-api.h>
|
||||
#include <os/nv_memory_type.h>
|
||||
#include <os/nv_memory_area.h>
|
||||
#include <nv-caps.h>
|
||||
|
||||
#include "rs_access.h"
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
@ -102,8 +105,10 @@ NvBool NV_API_CALL os_pci_remove_supported (void);
|
||||
void NV_API_CALL os_pci_remove (void *);
|
||||
void* NV_API_CALL os_map_kernel_space (NvU64, NvU64, NvU32);
|
||||
void NV_API_CALL os_unmap_kernel_space (void *, NvU64);
|
||||
void* NV_API_CALL os_map_user_space (NvU64, NvU64, NvU32, NvU32, void **);
|
||||
#if defined(NV_VMWARE)
|
||||
void* NV_API_CALL os_map_user_space (MemoryArea *, NvU32, NvU32, void **);
|
||||
void NV_API_CALL os_unmap_user_space (void *, NvU64, void *);
|
||||
#endif
|
||||
NV_STATUS NV_API_CALL os_flush_cpu_cache_all (void);
|
||||
NV_STATUS NV_API_CALL os_flush_user_cache (void);
|
||||
void NV_API_CALL os_flush_cpu_write_combine_buffer(void);
|
||||
@ -114,7 +119,7 @@ void NV_API_CALL os_io_write_byte (NvU32, NvU8);
|
||||
void NV_API_CALL os_io_write_word (NvU32, NvU16);
|
||||
void NV_API_CALL os_io_write_dword (NvU32, NvU32);
|
||||
NvBool NV_API_CALL os_is_administrator (void);
|
||||
NvBool NV_API_CALL os_allow_priority_override (void);
|
||||
NvBool NV_API_CALL os_check_access (RsAccessRight accessRight);
|
||||
void NV_API_CALL os_dbg_init (void);
|
||||
void NV_API_CALL os_dbg_breakpoint (void);
|
||||
void NV_API_CALL os_dbg_set_level (NvU32);
|
||||
@ -130,7 +135,8 @@ void NV_API_CALL os_free_spinlock (void *);
|
||||
NvU64 NV_API_CALL os_acquire_spinlock (void *);
|
||||
void NV_API_CALL os_release_spinlock (void *, NvU64);
|
||||
NV_STATUS NV_API_CALL os_queue_work_item (struct os_work_queue *, void *);
|
||||
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *);
|
||||
NV_STATUS NV_API_CALL os_flush_work_queue (struct os_work_queue *, NvBool);
|
||||
NvBool NV_API_CALL os_is_queue_flush_ongoing (struct os_work_queue *);
|
||||
NV_STATUS NV_API_CALL os_alloc_mutex (void **);
|
||||
void NV_API_CALL os_free_mutex (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_mutex (void *);
|
||||
@ -219,6 +225,8 @@ extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
extern NvU8 os_page_shift;
|
||||
extern NvBool os_cc_enabled;
|
||||
extern NvBool os_cc_sev_snp_enabled;
|
||||
extern NvBool os_cc_snp_vtom_enabled;
|
||||
extern NvBool os_cc_tdx_enabled;
|
||||
extern NvBool os_dma_buf_enabled;
|
||||
extern NvBool os_imex_channel_is_supported;
|
||||
|
@ -36,4 +36,69 @@ typedef struct MemoryArea
|
||||
NvU64 numRanges;
|
||||
} MemoryArea;
|
||||
|
||||
static inline NvU64 memareaSize(MemoryArea memArea)
|
||||
{
|
||||
NvU64 size = 0;
|
||||
NvU64 idx = 0;
|
||||
for (idx = 0; idx < memArea.numRanges; idx++)
|
||||
{
|
||||
size += memArea.pRanges[idx].size;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
static inline MemoryRange
|
||||
mrangeMake
|
||||
(
|
||||
NvU64 start,
|
||||
NvU64 size
|
||||
)
|
||||
{
|
||||
MemoryRange range;
|
||||
range.start = start;
|
||||
range.size = size;
|
||||
return range;
|
||||
}
|
||||
|
||||
static inline NvU64
|
||||
mrangeLimit
|
||||
(
|
||||
MemoryRange a
|
||||
)
|
||||
{
|
||||
return a.start + a.size;
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
mrangeIntersects
|
||||
(
|
||||
MemoryRange a,
|
||||
MemoryRange b
|
||||
)
|
||||
{
|
||||
return ((a.start >= b.start) && (a.start < mrangeLimit(b))) ||
|
||||
((b.start >= a.start) && (b.start < mrangeLimit(a)));
|
||||
}
|
||||
|
||||
static inline NvBool
|
||||
mrangeContains
|
||||
(
|
||||
MemoryRange outer,
|
||||
MemoryRange inner
|
||||
)
|
||||
{
|
||||
return (inner.start >= outer.start) && (mrangeLimit(inner) <= mrangeLimit(outer));
|
||||
}
|
||||
|
||||
static inline MemoryRange
|
||||
mrangeOffset
|
||||
(
|
||||
MemoryRange range,
|
||||
NvU64 amt
|
||||
)
|
||||
{
|
||||
range.start += amt;
|
||||
return range;
|
||||
}
|
||||
|
||||
#endif /* NV_MEMORY_AREA_H */
|
||||
|
@ -85,9 +85,11 @@ NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_nvlink_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuNvlinkInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_p2p_object_create(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuDeviceHandle_t, NvHandle *);
|
||||
void NV_API_CALL rm_gpu_ops_p2p_object_destroy(nvidia_stack_t *, nvgpuSessionHandle_t, NvHandle);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_ptes(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalMappingInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_external_alloc_phys_addrs(nvidia_stack_t*, nvgpuAddressSpaceHandle_t, NvHandle, NvU64, NvU64, nvgpuExternalPhysAddrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_retain_channel(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvHandle, NvHandle, void **, nvgpuChannelInstanceInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_bind_channel_resources(nvidia_stack_t *, void *, nvgpuChannelResourceBindParams_t);
|
||||
void NV_API_CALL rm_gpu_ops_release_channel(nvidia_stack_t *, void *);
|
||||
@ -100,6 +102,7 @@ void NV_API_CALL rm_gpu_ops_paging_channel_destroy(nvidia_stack_t *, nvgpu
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t, NvU64 *);
|
||||
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
|
||||
void NV_API_CALL rm_gpu_ops_report_fatal_error(nvidia_stack_t *, NV_STATUS error);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
|
||||
|
276
kernel-open/common/inc/rs_access.h
Normal file
276
kernel-open/common/inc/rs_access.h
Normal file
@ -0,0 +1,276 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <nvtypes.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
|
||||
//
|
||||
// This file was generated with FINN, an NVIDIA coding tool.
|
||||
// Source file: rs_access.finn
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right definitions */
|
||||
/****************************************************************************/
|
||||
|
||||
//
|
||||
// The meaning of each access right is documented in
|
||||
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
//
|
||||
// RS_ACCESS_COUNT is the number of access rights that have been defined
|
||||
// and are in use. All integers in the range [0, RS_ACCESS_COUNT) should
|
||||
// represent valid access rights.
|
||||
//
|
||||
// When adding a new access right, don't forget to update
|
||||
// 1) The descriptions in the resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
// 2) RS_ACCESS_COUNT, defined below
|
||||
// 3) The declaration of g_rsAccessMetadata in rs_access_rights.c
|
||||
// 4) The list of access rights in drivers/common/chip-config/Chipcontrols.pm
|
||||
// 5) Any relevant access right callbacks
|
||||
//
|
||||
|
||||
#define RS_ACCESS_DUP_OBJECT 0U
|
||||
#define RS_ACCESS_NICE 1U
|
||||
#define RS_ACCESS_DEBUG 2U
|
||||
#define RS_ACCESS_PERFMON 3U
|
||||
#define RS_ACCESS_COUNT 4U
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right data structures */
|
||||
/****************************************************************************/
|
||||
|
||||
/*!
|
||||
* @brief A type that can be used to represent any access right.
|
||||
*/
|
||||
typedef NvU16 RsAccessRight;
|
||||
|
||||
/*!
|
||||
* @brief An internal type used to represent one limb in an access right mask.
|
||||
*/
|
||||
typedef NvU32 RsAccessLimb;
|
||||
#define SDK_RS_ACCESS_LIMB_BITS 32
|
||||
|
||||
/*!
|
||||
* @brief The number of limbs in the RS_ACCESS_MASK struct.
|
||||
*/
|
||||
#define SDK_RS_ACCESS_MAX_LIMBS 1
|
||||
|
||||
/*!
|
||||
* @brief The maximum number of possible access rights supported by the
|
||||
* current data structure definition.
|
||||
*
|
||||
* You probably want RS_ACCESS_COUNT instead, which is the number of actual
|
||||
* access rights defined.
|
||||
*/
|
||||
#define SDK_RS_ACCESS_MAX_COUNT (0x20) /* finn: Evaluated from "(SDK_RS_ACCESS_LIMB_BITS * SDK_RS_ACCESS_MAX_LIMBS)" */
|
||||
|
||||
/**
|
||||
* @brief A struct representing a set of access rights.
|
||||
*
|
||||
* Note that the values of bit positions larger than RS_ACCESS_COUNT is
|
||||
* undefined, and should not be assumed to be 0 (see RS_ACCESS_MASK_FILL).
|
||||
*/
|
||||
typedef struct RS_ACCESS_MASK {
|
||||
RsAccessLimb limbs[SDK_RS_ACCESS_MAX_LIMBS];
|
||||
} RS_ACCESS_MASK;
|
||||
|
||||
/**
|
||||
* @brief A struct representing auxiliary information about each access right.
|
||||
*/
|
||||
typedef struct RS_ACCESS_INFO {
|
||||
NvU32 flags;
|
||||
} RS_ACCESS_INFO;
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Access right macros */
|
||||
/****************************************************************************/
|
||||
|
||||
#define SDK_RS_ACCESS_LIMB_INDEX(index) ((index) / SDK_RS_ACCESS_LIMB_BITS)
|
||||
#define SDK_RS_ACCESS_LIMB_POS(index) ((index) % SDK_RS_ACCESS_LIMB_BITS)
|
||||
|
||||
#define SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) \
|
||||
((pAccessMask)->limbs[SDK_RS_ACCESS_LIMB_INDEX(index)])
|
||||
#define SDK_RS_ACCESS_OFFSET_MASK(index) \
|
||||
NVBIT_TYPE(SDK_RS_ACCESS_LIMB_POS(index), RsAccessLimb)
|
||||
|
||||
/*!
|
||||
* @brief Checks that accessRight represents a valid access right.
|
||||
*
|
||||
* The valid range of access rights is [0, RS_ACCESS_COUNT).
|
||||
*
|
||||
* @param[in] accessRight The access right value to check
|
||||
*
|
||||
* @return true if accessRight is valid
|
||||
* @return false otherwise
|
||||
*/
|
||||
#define RS_ACCESS_BOUNDS_CHECK(accessRight) \
|
||||
(accessRight < RS_ACCESS_COUNT)
|
||||
|
||||
/*!
|
||||
* @brief Test whether an access right is present in a set
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to read
|
||||
* @param[in] index The access right to examine
|
||||
*
|
||||
* @return NV_TRUE if the access right specified by index was present in the set,
|
||||
* and NV_FALSE otherwise
|
||||
*/
|
||||
#define RS_ACCESS_MASK_TEST(pAccessMask, index) \
|
||||
(RS_ACCESS_BOUNDS_CHECK(index) && \
|
||||
(SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) & SDK_RS_ACCESS_OFFSET_MASK(index)) != 0)
|
||||
|
||||
/*!
|
||||
* @brief Add an access right to a mask
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to modify
|
||||
* @param[in] index The access right to set
|
||||
*/
|
||||
#define RS_ACCESS_MASK_ADD(pAccessMask, index) \
|
||||
do \
|
||||
{ \
|
||||
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) |= SDK_RS_ACCESS_OFFSET_MASK(index); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Remove an access right from a mask
|
||||
*
|
||||
* @param[in] pAccessMask The set of access rights to modify
|
||||
* @param[in] index The access right to unset
|
||||
*/
|
||||
#define RS_ACCESS_MASK_REMOVE(pAccessMask, index) \
|
||||
do \
|
||||
{ \
|
||||
if (RS_ACCESS_BOUNDS_CHECK(index)) { \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pAccessMask, index) &= ~SDK_RS_ACCESS_OFFSET_MASK(index); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Performs an in-place union between two access right masks
|
||||
*
|
||||
* @param[in,out] pMaskOut The access rights mask to be updated
|
||||
* @param[in] pMaskIn The set of access rights to be added to pMaskOut
|
||||
*/
|
||||
#define RS_ACCESS_MASK_UNION(pMaskOut, pMaskIn) \
|
||||
do \
|
||||
{ \
|
||||
NvLength limb; \
|
||||
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
|
||||
{ \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) |= SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Performs an in-place subtract of one mask's rights from another
|
||||
*
|
||||
* @param[in,out] pMaskOut The access rights mask to be updated
|
||||
* @param[in] pMaskIn The set of access rights to be removed from pMaskOut
|
||||
*/
|
||||
#define RS_ACCESS_MASK_SUBTRACT(pMaskOut, pMaskIn) \
|
||||
do \
|
||||
{ \
|
||||
NvLength limb; \
|
||||
for (limb = 0; limb < SDK_RS_ACCESS_MAX_LIMBS; limb++) \
|
||||
{ \
|
||||
SDK_RS_ACCESS_LIMB_ELT(pMaskOut, limb) &= ~SDK_RS_ACCESS_LIMB_ELT(pMaskIn, limb); \
|
||||
} \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Removes all rights from an access rights mask
|
||||
*
|
||||
* @param[in,out] pAccessMask The access rights mask to be updated
|
||||
*/
|
||||
#define RS_ACCESS_MASK_CLEAR(pAccessMask) \
|
||||
do \
|
||||
{ \
|
||||
portMemSet(pAccessMask, 0, sizeof(*pAccessMask)); \
|
||||
} while (NV_FALSE)
|
||||
|
||||
/*!
|
||||
* @brief Adds all rights to an access rights mask
|
||||
*
|
||||
* @param[in,out] pAccessMask The access rights mask to be updated
|
||||
*/
|
||||
#define RS_ACCESS_MASK_FILL(pAccessMask) \
|
||||
do \
|
||||
{ \
|
||||
portMemSet(pAccessMask, 0xff, sizeof(*pAccessMask)); \
|
||||
} while (NV_FALSE)
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/* Share definitions */
|
||||
/****************************************************************************/
|
||||
|
||||
//
|
||||
// The usage of Share Policy and the meaning of each share type is documented in
|
||||
// resman/docs/rmapi/resource_server/rm_capabilities.adoc
|
||||
//
|
||||
#define RS_SHARE_TYPE_NONE (0U)
|
||||
#define RS_SHARE_TYPE_ALL (1U)
|
||||
#define RS_SHARE_TYPE_OS_SECURITY_TOKEN (2U)
|
||||
#define RS_SHARE_TYPE_CLIENT (3U)
|
||||
#define RS_SHARE_TYPE_PID (4U)
|
||||
#define RS_SHARE_TYPE_SMC_PARTITION (5U)
|
||||
#define RS_SHARE_TYPE_GPU (6U)
|
||||
#define RS_SHARE_TYPE_FM_CLIENT (7U)
|
||||
// Must be last. Update when a new SHARE_TYPE is added
|
||||
#define RS_SHARE_TYPE_MAX (8U)
|
||||
|
||||
|
||||
//
|
||||
// Use Revoke to remove an existing policy from the list.
|
||||
// Allow is based on OR logic, Require is based on AND logic.
|
||||
// To share a right, at least one Allow (non-Require) must match, and all Require must pass.
|
||||
// If Compose is specified, policies will be added to the list. Otherwise, they will replace the list.
|
||||
//
|
||||
#define RS_SHARE_ACTION_FLAG_REVOKE NVBIT(0)
|
||||
#define RS_SHARE_ACTION_FLAG_REQUIRE NVBIT(1)
|
||||
#define RS_SHARE_ACTION_FLAG_COMPOSE NVBIT(2)
|
||||
|
||||
/****************************************************************************/
|
||||
/* Share flag data structures */
|
||||
/****************************************************************************/
|
||||
|
||||
typedef struct RS_SHARE_POLICY {
|
||||
NvU32 target;
|
||||
RS_ACCESS_MASK accessMask;
|
||||
NvU16 type; ///< RS_SHARE_TYPE_
|
||||
NvU8 action; ///< RS_SHARE_ACTION_
|
||||
} RS_SHARE_POLICY;
|
@ -661,23 +661,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_GET_DOMAIN_BUS_AND_SLOT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pci_bus_address)
|
||||
#
|
||||
# Determine if the pci_bus_address() function is
|
||||
# present.
|
||||
#
|
||||
# Added by commit 06cf56e497c8 ("PCI: Add pci_bus_address() to
|
||||
# get bus address of a BAR") in v3.14
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_pci_bus_address(void) {
|
||||
pci_bus_address();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_BUS_ADDRESS_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
hash__remap_4k_pfn)
|
||||
#
|
||||
# Determine if the hash__remap_4k_pfn() function is
|
||||
@ -1538,23 +1521,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GET_NUM_PHYSPAGES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
backing_dev_info)
|
||||
#
|
||||
# Determine if the 'address_space' structure has
|
||||
# a 'backing_dev_info' field.
|
||||
#
|
||||
# Removed by commit b83ae6d42143 ("fs: remove
|
||||
# mapping->backing_dev_info") in v4.0
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_backing_dev_info(void) {
|
||||
return offsetof(struct address_space, backing_dev_info);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO" "" "types"
|
||||
;;
|
||||
|
||||
xen_ioemu_inject_msi)
|
||||
# Determine if the xen_ioemu_inject_msi() function is present.
|
||||
CODE="
|
||||
@ -2409,45 +2375,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_ATOMIC_HELPER_LEGACY_GAMMA_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
wait_on_bit_lock_argument_count)
|
||||
#
|
||||
# Determine how many arguments wait_on_bit_lock takes.
|
||||
#
|
||||
# Changed by commit 743162013d40 ("sched: Remove proliferation
|
||||
# of wait_on_bit() action functions") in v3.17 (2014-07-07)
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/wait.h>
|
||||
void conftest_wait_on_bit_lock(void) {
|
||||
wait_on_bit_lock(NULL, 0, 0);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/wait.h>
|
||||
void conftest_wait_on_bit_lock(void) {
|
||||
wait_on_bit_lock(NULL, 0, NULL, 0);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT 4" | append_conftest "functions"
|
||||
return
|
||||
fi
|
||||
echo "#error wait_on_bit_lock() conftest failed!" | append_conftest "functions"
|
||||
;;
|
||||
|
||||
pci_stop_and_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_stop_and_remove_bus_device() function is present.
|
||||
@ -2523,31 +2450,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
mm_context_t)
|
||||
#
|
||||
# Determine if the 'mm_context_t' data type is present
|
||||
# and if it has an 'id' member.
|
||||
# It does not exist on all architectures.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/mm.h>
|
||||
int conftest_mm_context_t(void) {
|
||||
return offsetof(mm_context_t, id);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
else
|
||||
echo "#undef NV_MM_CONTEXT_T_HAS_ID" | append_conftest "types"
|
||||
return
|
||||
fi
|
||||
;;
|
||||
|
||||
pci_dev_has_ats_enabled)
|
||||
#
|
||||
# Determine if the 'pci_dev' data type has a 'ats_enabled' member.
|
||||
@ -5102,6 +5004,42 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_CC_PLATFORM_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
cc_attr_guest_sev_snp)
|
||||
#
|
||||
# Determine if 'CC_ATTR_GUEST_SEV_SNP' is present.
|
||||
#
|
||||
# Added by commit aa5a461171f9 ("x86/mm: Extend cc_attr to
|
||||
# include AMD SEV-SNP") in v5.19.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_CC_PLATFORM_H_PRESENT)
|
||||
#include <linux/cc_platform.h>
|
||||
#endif
|
||||
|
||||
enum cc_attr cc_attributes = CC_ATTR_GUEST_SEV_SNP;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CC_ATTR_SEV_SNP" "" "types"
|
||||
;;
|
||||
|
||||
hv_get_isolation_type)
|
||||
#
|
||||
# Determine if 'hv_get_isolation_type()' is present.
|
||||
# Added by commit faff44069ff5 ("x86/hyperv: Add Write/Read MSR
|
||||
# registers via ghcb page") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_ASM_MSHYPERV_H_PRESENT)
|
||||
#include <asm/mshyperv.h>
|
||||
#endif
|
||||
void conftest_hv_get_isolation_type(void) {
|
||||
int i;
|
||||
hv_get_isolation_type(i);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_HV_GET_ISOLATION_TYPE" "" "functions"
|
||||
;;
|
||||
|
||||
drm_prime_pages_to_sg_has_drm_device_arg)
|
||||
#
|
||||
# Determine if drm_prime_pages_to_sg() has 'dev' argument.
|
||||
@ -6596,7 +6534,9 @@ compile_test() {
|
||||
# Determine whether drm_fbdev_generic_setup is present.
|
||||
#
|
||||
# Added by commit 9060d7f49376 ("drm/fb-helper: Finish the
|
||||
# generic fbdev emulation") in v4.19.
|
||||
# generic fbdev emulation") in v4.19. Removed by commit
|
||||
# aae4682e5d66 ("drm/fbdev-generic: Convert to fbdev-ttm")
|
||||
# in v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
@ -6608,6 +6548,48 @@ compile_test() {
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_GENERIC_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_fbdev_ttm_setup)
|
||||
#
|
||||
# Determine whether drm_fbdev_ttm_setup is present.
|
||||
#
|
||||
# Added by commit aae4682e5d66 ("drm/fbdev-generic:
|
||||
# Convert to fbdev-ttm") in v6.11.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#endif
|
||||
void conftest_drm_fbdev_ttm_setup(void) {
|
||||
drm_fbdev_ttm_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FBDEV_TTM_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_output_poll_changed)
|
||||
#
|
||||
# Determine whether drm_mode_config_funcs.output_poll_changed
|
||||
# callback is present
|
||||
#
|
||||
# Removed by commit 446d0f4849b1 ("drm: Remove struct
|
||||
# drm_mode_config_funcs.output_poll_changed") in v6.12. Hotplug
|
||||
# event support is handled through the fbdev emulation interface
|
||||
# going forward.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_MODE_CONFIG_H_PRESENT)
|
||||
#include <drm/drm_mode_config.h>
|
||||
#else
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
int conftest_drm_output_poll_changed_available(void) {
|
||||
return offsetof(struct drm_mode_config_funcs, output_poll_changed);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_OUTPUT_POLL_CHANGED_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
drm_aperture_remove_conflicting_pci_framebuffers)
|
||||
@ -6990,6 +6972,192 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_PROPERTY_BLOB_PUT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_driver_has_gem_prime_mmap)
|
||||
#
|
||||
# Determine if the 'drm_driver' structure has a 'gem_prime_mmap'
|
||||
# function pointer.
|
||||
#
|
||||
# Removed by commit 0adec22702d4 ("drm: Remove struct
|
||||
# drm_driver.gem_prime_mmap") in v6.6.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_driver_has_gem_prime_mmap(void) {
|
||||
return offsetof(struct drm_driver, gem_prime_mmap);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP" "" "types"
|
||||
;;
|
||||
|
||||
drm_gem_prime_mmap)
|
||||
#
|
||||
# Determine if the function drm_gem_prime_mmap() is present.
|
||||
#
|
||||
# Added by commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()
|
||||
# in v5.0
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
void conftest_drm_gem_prime_mmap(void) {
|
||||
drm_gem_prime_mmap();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_MMAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
vmf_insert_mixed)
|
||||
#
|
||||
# Determine if the function vmf_insert_mixed() is present.
|
||||
#
|
||||
# Added by commit 1c8f422059ae ("mm: change return type to
|
||||
# vm_fault_t") in v4.17.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_vmf_insert_mixed() {
|
||||
vmf_insert_mixed();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VMF_INSERT_MIXED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pfn_to_pfn_t)
|
||||
#
|
||||
# Determine if the function pfn_to_pfn_t() is present.
|
||||
#
|
||||
# Added by commit 34c0fd540e79 ("mm, dax, pmem: introduce pfn_t") in
|
||||
# v4.5.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_PFN_T_H_PRESENT)
|
||||
#include <linux/pfn_t.h>
|
||||
#endif
|
||||
void conftest_pfn_to_pfn_t() {
|
||||
pfn_to_pfn_t();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PFN_TO_PFN_T_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_gem_dmabuf_mmap)
|
||||
#
|
||||
# Determine if the drm_gem_dmabuf_mmap() function is present.
|
||||
#
|
||||
# drm_gem_dmabuf_mmap() was exported by commit c308279f8798 ("drm:
|
||||
# export gem dmabuf_ops for drivers to reuse") in v4.17.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
void conftest_drm_gem_dmabuf_mmap(void) {
|
||||
drm_gem_dmabuf_mmap();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_DMABUF_MMAP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_gem_prime_export_has_dev_arg)
|
||||
#
|
||||
# Determine if drm_gem_prime_export() function has a 'dev' argument.
|
||||
#
|
||||
# This argument was removed by commit e4fa8457b219 ("drm/prime:
|
||||
# Align gem_prime_export with obj_funcs.export") in v5.4.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_PRIME_H_PRESENT)
|
||||
#include <drm/drm_prime.h>
|
||||
#endif
|
||||
|
||||
void conftest_drm_gem_prime_export_has_dev_arg(
|
||||
struct drm_device *dev,
|
||||
struct drm_gem_object *obj) {
|
||||
(void) drm_gem_prime_export(dev, obj, 0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_PRIME_EXPORT_HAS_DEV_ARG" "" "types"
|
||||
;;
|
||||
|
||||
dma_buf_ops_has_cache_sgt_mapping)
|
||||
#
|
||||
# Determine if dma_buf_ops structure has a 'cache_sgt_mapping'
|
||||
# member.
|
||||
#
|
||||
# dma_buf_ops::cache_sgt_mapping was added by commit f13e143e7444
|
||||
# ("dma-buf: start caching of sg_table objects v2") in v5.3.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/dma-buf.h>
|
||||
int conftest_dma_ops_has_cache_sgt_mapping(void) {
|
||||
return offsetof(struct dma_buf_ops, cache_sgt_mapping);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DMA_BUF_OPS_HAS_CACHE_SGT_MAPPING" "" "types"
|
||||
;;
|
||||
|
||||
drm_gem_object_funcs)
|
||||
#
|
||||
# Determine if the 'struct drm_gem_object_funcs' type is present.
|
||||
#
|
||||
# Added by commit b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
|
||||
# in v5.0.
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_GEM_H_PRESENT)
|
||||
#include <drm/drm_gem.h>
|
||||
#endif
|
||||
struct drm_gem_object_funcs funcs;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_FUNCS_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
struct_page_has_zone_device_data)
|
||||
#
|
||||
# Determine if struct page has a 'zone_device_data' field.
|
||||
#
|
||||
# Added by commit 8a164fef9c4c ("mm: simplify ZONE_DEVICE page
|
||||
# private data") in v5.3.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm_types.h>
|
||||
int conftest_struct_page_has_zone_device_data(void) {
|
||||
return offsetof(struct page, zone_device_data);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA" "" "types"
|
||||
;;
|
||||
|
||||
folio_test_swapcache)
|
||||
#
|
||||
# Determine if the folio_test_swapcache() function is present.
|
||||
#
|
||||
# folio_test_swapcache() was exported by commit d389a4a811551 ("mm:
|
||||
# Add folio flag manipulation functions") in v5.16.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/page-flags.h>
|
||||
void conftest_folio_test_swapcache(void) {
|
||||
folio_test_swapcache();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLIO_TEST_SWAPCACHE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit. Please
|
||||
# avoid specifying -rc kernels, and only use SHAs that actually exist
|
||||
|
@ -15,6 +15,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_atomic_uapi.h \
|
||||
drm/drm_drv.h \
|
||||
drm/drm_fbdev_generic.h \
|
||||
drm/drm_fbdev_ttm.h \
|
||||
drm/drm_framebuffer.h \
|
||||
drm/drm_connector.h \
|
||||
drm/drm_probe_helper.h \
|
||||
@ -99,5 +100,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/sync_file.h \
|
||||
linux/cc_platform.h \
|
||||
asm/cpufeature.h \
|
||||
linux/mpi.h
|
||||
linux/mpi.h \
|
||||
asm/mshyperv.h \
|
||||
linux/pfn_t.h
|
||||
|
||||
|
@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
120
kernel-open/nvidia-drm/nv_common_utils.h
Normal file
120
kernel-open/nvidia-drm/nv_common_utils.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __NV_COMMON_UTILS_H__
|
||||
#define __NV_COMMON_UTILS_H__
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
#if !defined(TRUE)
|
||||
#define TRUE NV_TRUE
|
||||
#endif
|
||||
|
||||
#if !defined(FALSE)
|
||||
#define FALSE NV_FALSE
|
||||
#endif
|
||||
|
||||
#define NV_IS_UNSIGNED(x) ((__typeof__(x))-1 > 0)
|
||||
|
||||
/* Get the length of a statically-sized array. */
|
||||
#define ARRAY_LEN(_arr) (sizeof(_arr) / sizeof(_arr[0]))
|
||||
|
||||
#define NV_INVALID_HEAD 0xFFFFFFFF
|
||||
|
||||
#define NV_INVALID_CONNECTOR_PHYSICAL_INFORMATION (~0)
|
||||
|
||||
#if !defined(NV_MIN)
|
||||
# define NV_MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MIN3(a,b,c) NV_MIN(NV_MIN(a, b), c)
|
||||
#define NV_MIN4(a,b,c,d) NV_MIN3(NV_MIN(a,b),c,d)
|
||||
|
||||
#if !defined(NV_MAX)
|
||||
# define NV_MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define NV_MAX3(a,b,c) NV_MAX(NV_MAX(a, b), c)
|
||||
#define NV_MAX4(a,b,c,d) NV_MAX3(NV_MAX(a,b),c,d)
|
||||
|
||||
static inline int NV_LIMIT_VAL_TO_MIN_MAX(int val, int min, int max)
|
||||
{
|
||||
if (val < min) {
|
||||
return min;
|
||||
}
|
||||
if (val > max) {
|
||||
return max;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
#define NV_ROUNDUP_DIV(x,y) ((x) / (y) + (((x) % (y)) ? 1 : 0))
|
||||
|
||||
/*
|
||||
* Macros used for computing palette entries:
|
||||
*
|
||||
* NV_UNDER_REPLICATE(val, source_size, result_size) expands a value
|
||||
* of source_size bits into a value of target_size bits by shifting
|
||||
* the source value into the high bits and replicating the high bits
|
||||
* of the value into the low bits of the result.
|
||||
*
|
||||
* PALETTE_DEPTH_SHIFT(val, w) maps a colormap entry for a component
|
||||
* that has w bits to an appropriate entry in a LUT of 256 entries.
|
||||
*/
|
||||
static inline unsigned int NV_UNDER_REPLICATE(unsigned short val,
|
||||
int source_size,
|
||||
int result_size)
|
||||
{
|
||||
return (val << (result_size - source_size)) |
|
||||
(val >> ((source_size << 1) - result_size));
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned short PALETTE_DEPTH_SHIFT(unsigned short val, int depth)
|
||||
{
|
||||
return NV_UNDER_REPLICATE(val, depth, 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use __builtin_ffs where it is supported, or provide an equivalent
|
||||
* implementation for platforms like riscv where it is not.
|
||||
*/
|
||||
#if defined(__GNUC__) && !NVCPU_IS_RISCV64
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
return __builtin_ffs(x);
|
||||
}
|
||||
#else
|
||||
static inline int nv_ffs(int x)
|
||||
{
|
||||
if (x == 0)
|
||||
return 0;
|
||||
|
||||
LOWESTBITIDX_32(x);
|
||||
|
||||
return 1 + x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __NV_COMMON_UTILS_H__ */
|
@ -85,7 +85,11 @@
|
||||
|
||||
/* For nv_drm_gem_prime_force_fence_signal */
|
||||
#ifndef spin_is_locked
|
||||
#if ((__FreeBSD_version >= 1500000) && (__FreeBSD_version < 1500018)) || (__FreeBSD_version < 1401501)
|
||||
#define spin_is_locked(lock) mtx_owned(lock.m)
|
||||
#else
|
||||
#define spin_is_locked(lock) mtx_owned(lock)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef rwsem_is_locked
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -38,6 +38,13 @@
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
enum nv_drm_transfer_function {
|
||||
NV_DRM_TRANSFER_FUNCTION_DEFAULT,
|
||||
NV_DRM_TRANSFER_FUNCTION_LINEAR,
|
||||
NV_DRM_TRANSFER_FUNCTION_PQ,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX,
|
||||
};
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@ -63,6 +70,8 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct NvKmsLUTCaps olut_caps;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
@ -142,6 +151,12 @@ struct nv_drm_crtc_state {
|
||||
* nv_drm_atomic_crtc_destroy_state().
|
||||
*/
|
||||
struct nv_drm_flip *nv_flip;
|
||||
|
||||
enum nv_drm_transfer_function regamma_tf;
|
||||
struct drm_property_blob *regamma_lut;
|
||||
uint64_t regamma_divisor;
|
||||
struct nv_drm_lut_surface *regamma_drm_lut_surface;
|
||||
NvBool regamma_changed;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *state)
|
||||
@ -149,6 +164,11 @@ static inline struct nv_drm_crtc_state *to_nv_crtc_state(struct drm_crtc_state *
|
||||
return container_of(state, struct nv_drm_crtc_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_crtc_state *to_nv_crtc_state_const(const struct drm_crtc_state *state)
|
||||
{
|
||||
return container_of(state, struct nv_drm_crtc_state, base);
|
||||
}
|
||||
|
||||
struct nv_drm_plane {
|
||||
/**
|
||||
* @base:
|
||||
@ -170,6 +190,9 @@ struct nv_drm_plane {
|
||||
* Index of this plane in the per head array of layers.
|
||||
*/
|
||||
uint32_t layer_idx;
|
||||
|
||||
struct NvKmsLUTCaps ilut_caps;
|
||||
struct NvKmsLUTCaps tmo_caps;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
@ -180,6 +203,22 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
return container_of(plane, struct nv_drm_plane, base);
|
||||
}
|
||||
|
||||
struct nv_drm_lut_surface {
|
||||
struct NvKmsKapiDevice *pDevice;
|
||||
struct NvKmsKapiMemory *nvkms_memory;
|
||||
struct NvKmsKapiSurface *nvkms_surface;
|
||||
struct {
|
||||
NvU32 vssSegments;
|
||||
enum NvKmsLUTVssType vssType;
|
||||
|
||||
NvU32 lutEntries;
|
||||
enum NvKmsLUTFormat entryFormat;
|
||||
|
||||
} properties;
|
||||
void *buffer;
|
||||
struct kref refcount;
|
||||
};
|
||||
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
@ -187,6 +226,20 @@ struct nv_drm_plane_state {
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
struct drm_property_blob *lms_ctm;
|
||||
struct drm_property_blob *lms_to_itp_ctm;
|
||||
struct drm_property_blob *itp_to_lms_ctm;
|
||||
struct drm_property_blob *blend_ctm;
|
||||
|
||||
enum nv_drm_transfer_function degamma_tf;
|
||||
struct drm_property_blob *degamma_lut;
|
||||
uint64_t degamma_multiplier; /* S31.32 Sign-Magnitude Format */
|
||||
struct nv_drm_lut_surface *degamma_drm_lut_surface;
|
||||
NvBool degamma_changed;
|
||||
|
||||
struct drm_property_blob *tmo_lut;
|
||||
struct nv_drm_lut_surface *tmo_drm_lut_surface;
|
||||
NvBool tmo_changed;
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
|
@ -64,12 +64,14 @@
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
#include <drm/drm_aperture.h>
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#if defined(NV_DRM_DRM_FBDEV_TTM_H_PRESENT)
|
||||
#include <drm/drm_fbdev_ttm.h>
|
||||
#elif defined(NV_DRM_DRM_FBDEV_GENERIC_H_PRESENT)
|
||||
#include <drm/drm_fbdev_generic.h>
|
||||
#endif
|
||||
|
||||
@ -105,16 +107,16 @@ static int nv_drm_revoke_sub_ownership(struct drm_device *dev);
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
static char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
return "scRGB Linear FP16";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
return "BT.2100 PQ";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
@ -122,8 +124,30 @@ static const char* nv_get_input_colorspace_name(
|
||||
}
|
||||
};
|
||||
|
||||
static char* nv_get_transfer_function_name(
|
||||
enum nv_drm_transfer_function tf)
|
||||
{
|
||||
switch (tf) {
|
||||
case NV_DRM_TRANSFER_FUNCTION_LINEAR:
|
||||
return "Linear";
|
||||
case NV_DRM_TRANSFER_FUNCTION_PQ:
|
||||
return "PQ (Perceptual Quantizer)";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported transfer function");
|
||||
#if defined(fallthrough)
|
||||
fallthrough;
|
||||
#else
|
||||
/* Fallthrough */
|
||||
#endif
|
||||
case NV_DRM_TRANSFER_FUNCTION_DEFAULT:
|
||||
return "Default";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
{
|
||||
struct drm_connector *connector = NULL;
|
||||
@ -167,6 +191,7 @@ static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_OUTPUT_POLL_CHANGED_PRESENT */
|
||||
|
||||
static struct drm_framebuffer *nv_drm_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
@ -204,7 +229,9 @@ static const struct drm_mode_config_funcs nv_mode_config_funcs = {
|
||||
.atomic_check = nv_drm_atomic_check,
|
||||
.atomic_commit = nv_drm_atomic_commit,
|
||||
|
||||
#if defined(NV_DRM_OUTPUT_POLL_CHANGED_PRESENT)
|
||||
.output_poll_changed = nv_drm_output_poll_changed,
|
||||
#endif
|
||||
};
|
||||
|
||||
static void nv_drm_event_callback(const struct NvKmsKapiEvent *event)
|
||||
@ -364,15 +391,21 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
struct drm_prop_enum_list colorspace_enum_list[3] = { };
|
||||
struct drm_prop_enum_list tf_enum_list[NV_DRM_TRANSFER_FUNCTION_MAX] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
colorspace_enum_list[len].type = i;
|
||||
colorspace_enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
for (i = 0; i < NV_DRM_TRANSFER_FUNCTION_MAX; i++) {
|
||||
tf_enum_list[i].type = i;
|
||||
tf_enum_list[i].name = nv_get_transfer_function_name(i);
|
||||
}
|
||||
|
||||
if (nv_dev->supportsSyncpts) {
|
||||
nv_dev->nv_out_fence_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_ATOMIC,
|
||||
@ -384,7 +417,7 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
colorspace_enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
@ -399,6 +432,109 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_plane_lms_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_LMS_CTM", 0);
|
||||
if (nv_dev->nv_plane_lms_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_lms_to_itp_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_LMS_TO_ITP_CTM", 0);
|
||||
if (nv_dev->nv_plane_lms_to_itp_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_itp_to_lms_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_ITP_TO_LMS_CTM", 0);
|
||||
if (nv_dev->nv_plane_itp_to_lms_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nv_dev->nv_plane_blend_ctm_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_BLEND_CTM", 0);
|
||||
if (nv_dev->nv_plane_blend_ctm_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// Degamma TF + LUT + LUT Size + Multiplier
|
||||
|
||||
nv_dev->nv_plane_degamma_tf_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0,
|
||||
"NV_PLANE_DEGAMMA_TF", tf_enum_list,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX);
|
||||
if (nv_dev->nv_plane_degamma_tf_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_DEGAMMA_LUT", 0);
|
||||
if (nv_dev->nv_plane_degamma_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_PLANE_DEGAMMA_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_plane_degamma_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_degamma_multiplier_property =
|
||||
drm_property_create_range(nv_dev->dev, 0,
|
||||
"NV_PLANE_DEGAMMA_MULTIPLIER", 0,
|
||||
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
|
||||
if (nv_dev->nv_plane_degamma_multiplier_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// TMO LUT + LUT Size
|
||||
|
||||
nv_dev->nv_plane_tmo_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_PLANE_TMO_LUT", 0);
|
||||
if (nv_dev->nv_plane_tmo_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_plane_tmo_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_PLANE_TMO_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_plane_tmo_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// REGAMMA TF + LUT + LUT Size + Divisor
|
||||
|
||||
nv_dev->nv_crtc_regamma_tf_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0,
|
||||
"NV_CRTC_REGAMMA_TF", tf_enum_list,
|
||||
NV_DRM_TRANSFER_FUNCTION_MAX);
|
||||
if (nv_dev->nv_crtc_regamma_tf_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_crtc_regamma_lut_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_CRTC_REGAMMA_LUT", 0);
|
||||
if (nv_dev->nv_crtc_regamma_lut_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
nv_dev->nv_crtc_regamma_lut_size_property =
|
||||
drm_property_create_range(nv_dev->dev, DRM_MODE_PROP_IMMUTABLE,
|
||||
"NV_CRTC_REGAMMA_LUT_SIZE", 0, UINT_MAX);
|
||||
if (nv_dev->nv_crtc_regamma_lut_size_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
// S31.32
|
||||
nv_dev->nv_crtc_regamma_divisor_property =
|
||||
drm_property_create_range(nv_dev->dev, 0,
|
||||
"NV_CRTC_REGAMMA_DIVISOR",
|
||||
(((NvU64) 1) << 32), // No values between 0 and 1
|
||||
U64_MAX & ~(((NvU64) 1) << 63)); // No negative values
|
||||
if (nv_dev->nv_crtc_regamma_divisor_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -476,7 +612,7 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/*
|
||||
* If fbdev is enabled, take modeset ownership now before other DRM clients
|
||||
* can take master (and thus NVKMS ownership).
|
||||
@ -548,6 +684,13 @@ static int nv_drm_load(struct drm_device *dev, unsigned long flags)
|
||||
|
||||
ret = nv_drm_create_properties(nv_dev);
|
||||
if (ret < 0) {
|
||||
drm_mode_config_cleanup(dev);
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
#endif
|
||||
nvKms->freeDevice(nv_dev->pDevice);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
@ -610,7 +753,7 @@ static void __nv_drm_unload(struct drm_device *dev)
|
||||
|
||||
/* Release modeset ownership if fbdev is enabled */
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_dev->hasFramebufferConsole) {
|
||||
drm_atomic_helper_shutdown(dev);
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
@ -710,36 +853,37 @@ void nv_drm_master_drop(struct drm_device *dev, struct drm_file *file_priv)
|
||||
#endif
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
int err;
|
||||
|
||||
nv_drm_revoke_modeset_permission(dev, file_priv, 0);
|
||||
nv_drm_revoke_sub_ownership(dev);
|
||||
|
||||
/*
|
||||
* After dropping nvkms modeset onwership, it is not guaranteed that
|
||||
* drm and nvkms modeset state will remain in sync. Therefore, disable
|
||||
* all outputs and crtcs before dropping nvkms modeset ownership.
|
||||
*
|
||||
* First disable all active outputs atomically and then disable each crtc one
|
||||
* by one, there is not helper function available to disable all crtcs
|
||||
* atomically.
|
||||
*/
|
||||
|
||||
drm_modeset_lock_all(dev);
|
||||
|
||||
if ((err = nv_drm_atomic_helper_disable_all(
|
||||
dev,
|
||||
dev->mode_config.acquire_ctx)) != 0) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"nv_drm_atomic_helper_disable_all failed with error code %d !",
|
||||
err);
|
||||
}
|
||||
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
if (!nv_dev->hasFramebufferConsole) {
|
||||
int err;
|
||||
|
||||
/*
|
||||
* After dropping nvkms modeset onwership, it is not guaranteed that drm
|
||||
* and nvkms modeset state will remain in sync. Therefore, disable all
|
||||
* outputs and crtcs before dropping nvkms modeset ownership.
|
||||
*
|
||||
* First disable all active outputs atomically and then disable each
|
||||
* crtc one by one, there is not helper function available to disable
|
||||
* all crtcs atomically.
|
||||
*/
|
||||
|
||||
drm_modeset_lock_all(dev);
|
||||
|
||||
if ((err = nv_drm_atomic_helper_disable_all(
|
||||
dev,
|
||||
dev->mode_config.acquire_ctx)) != 0) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"nv_drm_atomic_helper_disable_all failed with error code %d !",
|
||||
err);
|
||||
}
|
||||
|
||||
drm_modeset_unlock_all(dev);
|
||||
|
||||
nvKms->releaseOwnership(nv_dev->pDevice);
|
||||
}
|
||||
}
|
||||
@ -1684,14 +1828,19 @@ static struct drm_driver nv_drm_driver = {
|
||||
.num_ioctls = ARRAY_SIZE(nv_drm_ioctls),
|
||||
|
||||
/*
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers
|
||||
* for fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle().
|
||||
* Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME import/export for
|
||||
* all drivers") made drm_gem_prime_handle_to_fd() /
|
||||
* drm_gem_prime_fd_to_handle() the default when .prime_handle_to_fd /
|
||||
* .prime_fd_to_handle are unspecified, respectively.
|
||||
*
|
||||
* Prior Linux kernel v6.6 commit 6b85aa68d9d5 ("drm: Enable PRIME
|
||||
* import/export for all drivers") made these helpers the default when
|
||||
* .prime_handle_to_fd / .prime_fd_to_handle are unspecified, so it's fine
|
||||
* to just skip specifying them if the helpers aren't present.
|
||||
* Linux kernel v6.6 commit 71a7974ac701 ("drm/prime: Unexport helpers for
|
||||
* fd/handle conversion") unexports drm_gem_prime_handle_to_fd() and
|
||||
* drm_gem_prime_fd_to_handle(). However, because of the aforementioned commit,
|
||||
* it's fine to just skip specifying them in this case.
|
||||
*
|
||||
* Linux kernel v6.7 commit 0514f63cfff3 ("Revert "drm/prime: Unexport helpers
|
||||
* for fd/handle conversion"") exported the helpers again, but left the default
|
||||
* behavior intact. Nonetheless, it does not hurt to specify them.
|
||||
*/
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_drm_gem_prime_handle_to_fd
|
||||
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
|
||||
@ -1703,6 +1852,21 @@ static struct drm_driver nv_drm_driver = {
|
||||
.gem_prime_import = nv_drm_gem_prime_import,
|
||||
.gem_prime_import_sg_table = nv_drm_gem_prime_import_sg_table,
|
||||
|
||||
/*
|
||||
* Linux kernel v5.0 commit 7698799f95 ("drm/prime: Add drm_gem_prime_mmap()")
|
||||
* added drm_gem_prime_mmap().
|
||||
*
|
||||
* Linux kernel v6.6 commit 0adec22702d4 ("drm: Remove struct
|
||||
* drm_driver.gem_prime_mmap") removed .gem_prime_mmap, but replaced it with a
|
||||
* direct call to drm_gem_prime_mmap().
|
||||
*
|
||||
* TODO: Support .gem_prime_mmap on Linux < v5.0 using internal implementation.
|
||||
*/
|
||||
#if defined(NV_DRM_GEM_PRIME_MMAP_PRESENT) && \
|
||||
defined(NV_DRM_DRIVER_HAS_GEM_PRIME_MMAP)
|
||||
.gem_prime_mmap = drm_gem_prime_mmap,
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_CALLBACKS)
|
||||
.gem_prime_export = drm_gem_prime_export,
|
||||
.gem_prime_get_sg_table = nv_drm_gem_prime_get_sg_table,
|
||||
@ -1838,7 +2002,7 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
goto failed_drm_register;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
if (nv_drm_fbdev_module_param &&
|
||||
drm_core_check_feature(dev, DRIVER_MODESET)) {
|
||||
|
||||
@ -1850,10 +2014,15 @@ void nv_drm_register_drm_device(const nv_gpu_info_t *gpu_info)
|
||||
#else
|
||||
drm_aperture_remove_conflicting_pci_framebuffers(pdev, nv_drm_driver.name);
|
||||
#endif
|
||||
nvKms->framebufferConsoleDisabled(nv_dev->pDevice);
|
||||
}
|
||||
#if defined(NV_DRM_FBDEV_TTM_AVAILABLE)
|
||||
drm_fbdev_ttm_setup(dev, 32);
|
||||
#elif defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
drm_fbdev_generic_setup(dev, 32);
|
||||
#endif
|
||||
}
|
||||
#endif /* defined(NV_DRM_FBDEV_GENERIC_AVAILABLE) */
|
||||
#endif /* defined(NV_DRM_FBDEV_AVAILABLE) */
|
||||
|
||||
/* Add NVIDIA-DRM device into list */
|
||||
|
||||
@ -1995,12 +2164,12 @@ void nv_drm_suspend_resume(NvBool suspend)
|
||||
|
||||
if (suspend) {
|
||||
drm_kms_helper_poll_disable(dev);
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 1);
|
||||
#endif
|
||||
drm_mode_config_reset(dev);
|
||||
} else {
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
drm_fb_helper_set_suspend_unlocked(dev->fb_helper, 0);
|
||||
#endif
|
||||
drm_kms_helper_poll_enable(dev);
|
||||
|
@ -36,12 +36,15 @@
|
||||
|
||||
static void __nv_drm_framebuffer_free(struct nv_drm_framebuffer *nv_fb)
|
||||
{
|
||||
struct drm_framebuffer *fb = &nv_fb->base;
|
||||
uint32_t i;
|
||||
|
||||
/* Unreference gem object */
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
nv_drm_gem_object_unreference_unlocked(nv_fb->nv_gem[i]);
|
||||
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
|
||||
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
|
||||
if (gem != NULL) {
|
||||
struct nv_drm_gem_object *nv_gem = to_nv_gem_object(gem);
|
||||
nv_drm_gem_object_unreference_unlocked(nv_gem);
|
||||
}
|
||||
}
|
||||
|
||||
@ -69,10 +72,8 @@ static int
|
||||
nv_drm_framebuffer_create_handle(struct drm_framebuffer *fb,
|
||||
struct drm_file *file, unsigned int *handle)
|
||||
{
|
||||
struct nv_drm_framebuffer *nv_fb = to_nv_framebuffer(fb);
|
||||
|
||||
return nv_drm_gem_handle_create(file,
|
||||
nv_fb->nv_gem[0],
|
||||
to_nv_gem_object(nv_fb_get_gem_obj(fb, 0)),
|
||||
handle);
|
||||
}
|
||||
|
||||
@ -88,6 +89,7 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_framebuffer *nv_fb;
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
const int num_planes = nv_drm_format_num_planes(cmd->pixel_format);
|
||||
uint32_t i;
|
||||
|
||||
@ -101,21 +103,22 @@ static struct nv_drm_framebuffer *nv_drm_framebuffer_alloc(
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
if (num_planes > ARRAY_SIZE(nv_fb->nv_gem)) {
|
||||
if (num_planes > NVKMS_MAX_PLANES_PER_SURFACE) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(nv_dev, "Unsupported number of planes");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_planes; i++) {
|
||||
if ((nv_fb->nv_gem[i] = nv_drm_gem_object_lookup(
|
||||
dev,
|
||||
file,
|
||||
cmd->handles[i])) == NULL) {
|
||||
nv_gem = nv_drm_gem_object_lookup(dev, file, cmd->handles[i]);
|
||||
|
||||
if (nv_gem == NULL) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
"Failed to find gem object of type nvkms memory");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
nv_fb_set_gem_obj(&nv_fb->base, i, &nv_gem->base);
|
||||
}
|
||||
|
||||
return nv_fb;
|
||||
@ -135,12 +138,14 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct NvKmsKapiCreateSurfaceParams params = { };
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct drm_framebuffer *fb = &nv_fb->base;
|
||||
uint32_t i;
|
||||
int ret;
|
||||
|
||||
/* Initialize the base framebuffer object and add it to drm subsystem */
|
||||
|
||||
ret = drm_framebuffer_init(dev, &nv_fb->base, &nv_framebuffer_funcs);
|
||||
ret = drm_framebuffer_init(dev, fb, &nv_framebuffer_funcs);
|
||||
if (ret != 0) {
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
@ -148,23 +153,18 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nv_fb->nv_gem); i++) {
|
||||
if (nv_fb->nv_gem[i] != NULL) {
|
||||
if (!nvKms->isMemoryValidForDisplay(nv_dev->pDevice,
|
||||
nv_fb->nv_gem[i]->pMemory)) {
|
||||
NV_DRM_DEV_LOG_INFO(
|
||||
nv_dev,
|
||||
"Framebuffer memory not appropriate for scanout");
|
||||
goto fail;
|
||||
}
|
||||
for (i = 0; i < NVKMS_MAX_PLANES_PER_SURFACE; i++) {
|
||||
struct drm_gem_object *gem = nv_fb_get_gem_obj(fb, i);
|
||||
if (gem != NULL) {
|
||||
nv_gem = to_nv_gem_object(gem);
|
||||
|
||||
params.planes[i].memory = nv_fb->nv_gem[i]->pMemory;
|
||||
params.planes[i].offset = nv_fb->base.offsets[i];
|
||||
params.planes[i].pitch = nv_fb->base.pitches[i];
|
||||
params.planes[i].memory = nv_gem->pMemory;
|
||||
params.planes[i].offset = fb->offsets[i];
|
||||
params.planes[i].pitch = fb->pitches[i];
|
||||
}
|
||||
}
|
||||
params.height = nv_fb->base.height;
|
||||
params.width = nv_fb->base.width;
|
||||
params.height = fb->height;
|
||||
params.width = fb->width;
|
||||
params.format = format;
|
||||
|
||||
if (have_modifier) {
|
||||
@ -199,7 +199,7 @@ static int nv_drm_framebuffer_init(struct drm_device *dev,
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
drm_framebuffer_cleanup(&nv_fb->base);
|
||||
drm_framebuffer_cleanup(fb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -41,8 +41,10 @@
|
||||
struct nv_drm_framebuffer {
|
||||
struct NvKmsKapiSurface *pSurface;
|
||||
|
||||
struct nv_drm_gem_object*
|
||||
nv_gem[NVKMS_MAX_PLANES_PER_SURFACE];
|
||||
#if !defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
struct drm_gem_object*
|
||||
obj[NVKMS_MAX_PLANES_PER_SURFACE];
|
||||
#endif
|
||||
|
||||
struct drm_framebuffer base;
|
||||
};
|
||||
@ -56,6 +58,29 @@ static inline struct nv_drm_framebuffer *to_nv_framebuffer(
|
||||
return container_of(fb, struct nv_drm_framebuffer, base);
|
||||
}
|
||||
|
||||
static inline struct drm_gem_object *nv_fb_get_gem_obj(
|
||||
struct drm_framebuffer *fb,
|
||||
uint32_t plane)
|
||||
{
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
return fb->obj[plane];
|
||||
#else
|
||||
return to_nv_framebuffer(fb)->obj[plane];
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_fb_set_gem_obj(
|
||||
struct drm_framebuffer *fb,
|
||||
uint32_t plane,
|
||||
struct drm_gem_object *obj)
|
||||
{
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
fb->obj[plane] = obj;
|
||||
#else
|
||||
to_nv_framebuffer(fb)->obj[plane] = obj;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct drm_framebuffer *nv_drm_internal_framebuffer_create(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *file,
|
||||
|
@ -71,9 +71,20 @@ static void __nv_drm_gem_nvkms_memory_free(struct nv_drm_gem_object *nv_gem)
|
||||
nv_drm_free(nv_nvkms_memory);
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory);
|
||||
|
||||
static int __nv_drm_gem_nvkms_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return drm_gem_mmap_obj(&nv_gem->base,
|
||||
drm_vma_node_size(&nv_gem->base.vma_node) << PAGE_SHIFT, vma);
|
||||
}
|
||||
@ -146,11 +157,18 @@ static struct drm_gem_object *__nv_drm_gem_nvkms_prime_dup(
|
||||
static int __nv_drm_gem_nvkms_map(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nv_drm_device *nv_dev = nv_nvkms_memory->base.nv_dev;
|
||||
struct NvKmsKapiMemory *pMemory = nv_nvkms_memory->base.pMemory;
|
||||
|
||||
mutex_lock(&nv_nvkms_memory->map_lock);
|
||||
|
||||
if (nv_nvkms_memory->physically_mapped) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_dev->hasVideoMemory) {
|
||||
return 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvKms->mapMemory(nv_dev->pDevice,
|
||||
@ -161,7 +179,8 @@ static int __nv_drm_gem_nvkms_map(
|
||||
nv_dev,
|
||||
"Failed to map NvKmsKapiMemory 0x%p",
|
||||
pMemory);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = ioremap_wc(
|
||||
@ -177,7 +196,9 @@ static int __nv_drm_gem_nvkms_map(
|
||||
|
||||
nv_nvkms_memory->physically_mapped = true;
|
||||
|
||||
return 0;
|
||||
done:
|
||||
mutex_unlock(&nv_nvkms_memory->map_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
@ -186,14 +207,38 @@ static void *__nv_drm_gem_nvkms_prime_vmap(
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
if (nv_nvkms_memory->physically_mapped) {
|
||||
return nv_nvkms_memory->pWriteCombinedIORemapAddress;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this buffer isn't physically mapped, it might be backed by struct
|
||||
* pages. Use vmap in that case.
|
||||
*/
|
||||
if (nv_nvkms_memory->pages_count > 0) {
|
||||
return nv_drm_vmap(nv_nvkms_memory->pages,
|
||||
nv_nvkms_memory->pages_count);
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static void __nv_drm_gem_nvkms_prime_vunmap(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
void *address)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped &&
|
||||
nv_nvkms_memory->pages_count > 0) {
|
||||
nv_drm_vunmap(address);
|
||||
}
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
@ -201,17 +246,7 @@ static int __nv_drm_gem_map_nvkms_memory_offset(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
uint64_t *offset)
|
||||
{
|
||||
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory =
|
||||
to_nv_nvkms_memory(nv_gem);
|
||||
|
||||
if (!nv_nvkms_memory->physically_mapped) {
|
||||
int ret = __nv_drm_gem_nvkms_map(nv_nvkms_memory);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return nv_drm_gem_create_mmap_offset(&nv_nvkms_memory->base, offset);
|
||||
return nv_drm_gem_create_mmap_offset(nv_gem, offset);
|
||||
}
|
||||
|
||||
static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
@ -223,7 +258,7 @@ static struct sg_table *__nv_drm_gem_nvkms_memory_prime_get_sg_table(
|
||||
struct sg_table *sg_table;
|
||||
|
||||
if (nv_nvkms_memory->pages_count == 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
NV_DRM_DEV_DEBUG_DRIVER(
|
||||
nv_dev,
|
||||
"Cannot create sg_table for NvKmsKapiMemory 0x%p",
|
||||
nv_gem->pMemory);
|
||||
@ -241,6 +276,7 @@ const struct nv_drm_gem_object_funcs nv_gem_nvkms_memory_ops = {
|
||||
.free = __nv_drm_gem_nvkms_memory_free,
|
||||
.prime_dup = __nv_drm_gem_nvkms_prime_dup,
|
||||
.prime_vmap = __nv_drm_gem_nvkms_prime_vmap,
|
||||
.prime_vunmap = __nv_drm_gem_nvkms_prime_vunmap,
|
||||
.mmap = __nv_drm_gem_nvkms_mmap,
|
||||
.handle_vma_fault = __nv_drm_gem_nvkms_handle_vma_fault,
|
||||
.create_mmap_offset = __nv_drm_gem_map_nvkms_memory_offset,
|
||||
@ -265,6 +301,7 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_init(&nv_nvkms_memory->map_lock);
|
||||
nv_nvkms_memory->pPhysicalAddress = NULL;
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
|
@ -32,8 +32,15 @@
|
||||
struct nv_drm_gem_nvkms_memory {
|
||||
struct nv_drm_gem_object base;
|
||||
|
||||
/*
|
||||
* Lock to protect concurrent writes to physically_mapped, pPhysicalAddress,
|
||||
* and pWriteCombinedIORemapAddress.
|
||||
*
|
||||
* __nv_drm_gem_nvkms_map(), the sole writer, is structured such that
|
||||
* readers are not required to hold the lock.
|
||||
*/
|
||||
struct mutex map_lock;
|
||||
bool physically_mapped;
|
||||
|
||||
void *pPhysicalAddress;
|
||||
void *pWriteCombinedIORemapAddress;
|
||||
|
||||
|
@ -36,6 +36,10 @@
|
||||
#include "linux/mm.h"
|
||||
#include "nv-mm.h"
|
||||
|
||||
#if defined(NV_LINUX_PFN_T_H_PRESENT)
|
||||
#include "linux/pfn_t.h"
|
||||
#endif
|
||||
|
||||
#if defined(NV_BSD)
|
||||
#include <vm/vm_pageout.h>
|
||||
#endif
|
||||
@ -103,6 +107,37 @@ static int __nv_drm_gem_user_memory_mmap(struct nv_drm_gem_object *nv_gem,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX) && !defined(NV_VMF_INSERT_MIXED_PRESENT)
|
||||
static vm_fault_t __nv_vm_insert_mixed_helper(
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long pfn)
|
||||
{
|
||||
int ret;
|
||||
|
||||
#if defined(NV_PFN_TO_PFN_T_PRESENT)
|
||||
ret = vm_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
|
||||
#else
|
||||
ret = vm_insert_mixed(vma, address, pfn);
|
||||
#endif
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
case -EBUSY:
|
||||
/*
|
||||
* EBUSY indicates that another thread already handled
|
||||
* the faulted range.
|
||||
*/
|
||||
return VM_FAULT_NOPAGE;
|
||||
case -ENOMEM:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
struct nv_drm_gem_object *nv_gem,
|
||||
struct vm_area_struct *vma,
|
||||
@ -112,36 +147,19 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
unsigned long address = nv_page_fault_va(vmf);
|
||||
struct drm_gem_object *gem = vma->vm_private_data;
|
||||
unsigned long page_offset;
|
||||
vm_fault_t ret;
|
||||
unsigned long pfn;
|
||||
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset >= nv_user_memory->pages_count);
|
||||
pfn = page_to_pfn(nv_user_memory->pages[page_offset]);
|
||||
|
||||
#if !defined(NV_LINUX)
|
||||
ret = vmf_insert_pfn(vma, address, page_to_pfn(nv_user_memory->pages[page_offset]));
|
||||
#else /* !defined(NV_LINUX) */
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
case -EBUSY:
|
||||
/*
|
||||
* EBUSY indicates that another thread already handled
|
||||
* the faulted range.
|
||||
*/
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
break;
|
||||
case -ENOMEM:
|
||||
ret = VM_FAULT_OOM;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "Unhandled error in %s: %d\n", __FUNCTION__, ret);
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
break;
|
||||
}
|
||||
#endif /* !defined(NV_LINUX) */
|
||||
|
||||
return ret;
|
||||
return vmf_insert_pfn(vma, address, pfn);
|
||||
#elif defined(NV_VMF_INSERT_MIXED_PRESENT)
|
||||
return vmf_insert_mixed(vma, address, pfn_to_pfn_t(pfn));
|
||||
#else
|
||||
return __nv_vm_insert_mixed_helper(vma, address, pfn);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __nv_drm_gem_user_create_mmap_offset(
|
||||
|
@ -144,6 +144,12 @@ void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
|
||||
#endif
|
||||
|
||||
drm_gem_private_object_init(dev, &nv_gem->base, size);
|
||||
|
||||
/* Create mmap offset early for drm_gem_prime_mmap(), if possible. */
|
||||
if (nv_gem->ops->create_mmap_offset) {
|
||||
uint64_t offset;
|
||||
nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, &offset);
|
||||
}
|
||||
}
|
||||
|
||||
struct drm_gem_object *nv_drm_gem_prime_import(struct drm_device *dev,
|
||||
@ -232,6 +238,7 @@ int nv_drm_gem_map_offset_ioctl(struct drm_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* mmap offset creation is idempotent, fetch it by creating it again. */
|
||||
if (nv_gem->ops->create_mmap_offset) {
|
||||
ret = nv_gem->ops->create_mmap_offset(nv_dev, nv_gem, ¶ms->offset);
|
||||
} else {
|
||||
|
@ -40,8 +40,13 @@
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE) || \
|
||||
defined(NV_DRM_COLOR_CTM_3X4_PRESENT) || \
|
||||
defined(NV_DRM_COLOR_LUT_PRESENT)
|
||||
/*
|
||||
* For DRM_MODE_ROTATE_*, DRM_MODE_REFLECT_*, struct drm_color_ctm_3x4, and
|
||||
* struct drm_color_lut.
|
||||
*/
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
@ -358,6 +363,24 @@ static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_property_blob_put(struct drm_property_blob *blob)
|
||||
{
|
||||
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
|
||||
drm_property_blob_put(blob);
|
||||
#else
|
||||
drm_property_unreference_blob(blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_property_blob_get(struct drm_property_blob *blob)
|
||||
{
|
||||
#if defined(NV_DRM_PROPERTY_BLOB_PUT_PRESENT)
|
||||
drm_property_blob_get(blob);
|
||||
#else
|
||||
drm_property_reference_blob(blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
@ -625,6 +648,31 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
#define DRM_UNLOCKED 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct drm_color_ctm_3x4 was added by commit 6872a189be50 ("drm/amd/display:
|
||||
* Add 3x4 CTM support for plane CTM") in v6.8. For backwards compatibility,
|
||||
* define it when not present.
|
||||
*/
|
||||
#if !defined(NV_DRM_COLOR_CTM_3X4_PRESENT)
|
||||
struct drm_color_ctm_3x4 {
|
||||
__u64 matrix[12];
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct drm_color_lut was added by commit 5488dc16fde7 ("drm: introduce pipe
|
||||
* color correction properties") in v4.6. For backwards compatibility, define it
|
||||
* when not present.
|
||||
*/
|
||||
#if !defined(NV_DRM_COLOR_LUT_PRESENT)
|
||||
struct drm_color_lut {
|
||||
__u16 red;
|
||||
__u16 green;
|
||||
__u16 blue;
|
||||
__u16 reserved;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_vma_offset_exact_lookup_locked() were added
|
||||
* by kernel commit 2225cfe46bcc which was Signed-off-by:
|
||||
|
@ -34,7 +34,7 @@ MODULE_PARM_DESC(
|
||||
"Enable atomic kernel modesetting (1 = enable, 0 = disable (default))");
|
||||
module_param_named(modeset, nv_drm_modeset_module_param, bool, 0400);
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
MODULE_PARM_DESC(
|
||||
fbdev,
|
||||
"Create a framebuffer device (1 = enable, 0 = disable (default)) (EXPERIMENTAL)");
|
||||
|
@ -414,6 +414,31 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT)
|
||||
if (commit) {
|
||||
/*
|
||||
* This function does what is necessary to prepare the framebuffers
|
||||
* attached to each new plane in the state for scan out, mostly by
|
||||
* calling back into driver callbacks the NVIDIA driver does not
|
||||
* provide. The end result is that all it does on the NVIDIA driver
|
||||
* is populate the plane state's dma fence pointers with any implicit
|
||||
* sync fences attached to the GEM objects associated with those planes
|
||||
* in the new state, prefering explicit sync fences when appropriate.
|
||||
* This must be done prior to converting the per-plane fences to
|
||||
* semaphore waits below.
|
||||
*
|
||||
* Note this only works when the drm_framebuffer:obj[] field is present
|
||||
* and populated, so skip calling this function on kernels where that
|
||||
* field is not present.
|
||||
*/
|
||||
ret = drm_atomic_helper_prepare_planes(dev, state);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
#endif /* defined(NV_DRM_FRAMEBUFFER_OBJ_PRESENT) */
|
||||
|
||||
memset(requested_config, 0, sizeof(*requested_config));
|
||||
|
||||
/* Loop over affected crtcs and construct NvKmsKapiRequestedModeSetConfig */
|
||||
|
@ -59,14 +59,20 @@ typedef struct nv_timer nv_drm_timer;
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_GENERIC_AVAILABLE
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_FBDEV_TTM_SETUP_PRESENT) && defined(NV_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PRESENT)
|
||||
#define NV_DRM_FBDEV_AVAILABLE
|
||||
#define NV_DRM_FBDEV_TTM_AVAILABLE
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
/* Set to true when the atomic modeset feature is enabled. */
|
||||
extern bool nv_drm_modeset_module_param;
|
||||
#if defined(NV_DRM_FBDEV_GENERIC_AVAILABLE)
|
||||
#if defined(NV_DRM_FBDEV_AVAILABLE)
|
||||
/* Set to true when the nvidia-drm driver should install a framebuffer device */
|
||||
extern bool nv_drm_fbdev_module_param;
|
||||
#endif
|
||||
|
@ -163,6 +163,24 @@ struct nv_drm_device {
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct drm_property *nv_plane_lms_ctm_property;
|
||||
struct drm_property *nv_plane_lms_to_itp_ctm_property;
|
||||
struct drm_property *nv_plane_itp_to_lms_ctm_property;
|
||||
struct drm_property *nv_plane_blend_ctm_property;
|
||||
|
||||
struct drm_property *nv_plane_degamma_tf_property;
|
||||
struct drm_property *nv_plane_degamma_lut_property;
|
||||
struct drm_property *nv_plane_degamma_lut_size_property;
|
||||
struct drm_property *nv_plane_degamma_multiplier_property;
|
||||
|
||||
struct drm_property *nv_plane_tmo_lut_property;
|
||||
struct drm_property *nv_plane_tmo_lut_size_property;
|
||||
|
||||
struct drm_property *nv_crtc_regamma_tf_property;
|
||||
struct drm_property *nv_crtc_regamma_lut_property;
|
||||
struct drm_property *nv_crtc_regamma_lut_size_property;
|
||||
struct drm_property *nv_crtc_regamma_divisor_property;
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
||||
|
@ -67,10 +67,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += fence_set_error
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += sync_file_get_fence
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffers
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_generic_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_fbdev_ttm_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_attach_hdr_output_metadata_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_helper_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_crtc_enable_color_mgmt
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_legacy_gamma_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_mixed
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pfn_to_pfn_t
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_prime_mmap
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_bus_has_bus_type
|
||||
@ -130,3 +134,9 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_aperture_remove_conflicting_pci_framebuffe
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_create_dp_colorspace_property_has_supported_colorspaces_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_syncobj_features_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_framebuffer_obj_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_ctm_3x4_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_color_lut
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_property_blob_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_gem_prime_mmap
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_output_poll_changed
|
||||
|
@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
@ -86,6 +86,9 @@ module_param_named(vblank_sem_control, vblank_sem_control, bool, 0400);
|
||||
static bool opportunistic_display_sync = true;
|
||||
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
|
||||
|
||||
static enum NvKmsDebugForceColorSpace debug_force_color_space = NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
|
||||
module_param_named(debug_force_color_space, debug_force_color_space, uint, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@ -139,6 +142,14 @@ NvBool nvkms_opportunistic_display_sync(void)
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void)
|
||||
{
|
||||
if (debug_force_color_space >= NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX) {
|
||||
return NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE;
|
||||
}
|
||||
return debug_force_color_space;
|
||||
}
|
||||
|
||||
NvBool nvkms_kernel_supports_syncpts(void)
|
||||
{
|
||||
/*
|
||||
@ -1084,7 +1095,7 @@ static void nvkms_kapi_event_kthread_q_callback(void *arg)
|
||||
nvKmsKapiHandleEventQueueChange(device);
|
||||
}
|
||||
|
||||
struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
static struct nvkms_per_open *nvkms_open_common(enum NvKmsClientType type,
|
||||
struct NvKmsKapiDevice *device,
|
||||
int *status)
|
||||
{
|
||||
@ -1136,7 +1147,7 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
static void nvkms_close_pm_locked(struct nvkms_per_open *popen)
|
||||
{
|
||||
/*
|
||||
* Don't use down_interruptible(): we need to free resources
|
||||
@ -1199,7 +1210,7 @@ static void nvkms_close_popen(struct nvkms_per_open *popen)
|
||||
}
|
||||
}
|
||||
|
||||
int nvkms_ioctl_common
|
||||
static int nvkms_ioctl_common
|
||||
(
|
||||
struct nvkms_per_open *popen,
|
||||
NvU32 cmd, NvU64 address, const size_t size
|
||||
@ -1558,6 +1569,48 @@ NvBool nvKmsKapiGetFunctionsTable
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiGetFunctionsTable);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32(NvU16 a)
|
||||
{
|
||||
return nvKmsKapiF16ToF32Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF16ToF32);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16(NvU32 a)
|
||||
{
|
||||
return nvKmsKapiF32ToF16Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32ToF16);
|
||||
|
||||
NvU32 nvKmsKapiF32Mul(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32MulInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Mul);
|
||||
|
||||
NvU32 nvKmsKapiF32Div(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32DivInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Div);
|
||||
|
||||
NvU32 nvKmsKapiF32Add(NvU32 a, NvU32 b)
|
||||
{
|
||||
return nvKmsKapiF32AddInternal(a, b);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32Add);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMag(NvU32 a, NvBool exact)
|
||||
{
|
||||
return nvKmsKapiF32ToUI32RMinMagInternal(a, exact);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiF32ToUI32RMinMag);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32(NvU32 a)
|
||||
{
|
||||
return nvKmsKapiUI32ToF32Internal(a);
|
||||
}
|
||||
EXPORT_SYMBOL(nvKmsKapiUI32ToF32);
|
||||
|
||||
/*************************************************************************
|
||||
* File operation callback functions.
|
||||
*************************************************************************/
|
||||
|
@ -67,6 +67,14 @@ enum NvKmsSyncPtOp {
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
};
|
||||
|
||||
enum NvKmsDebugForceColorSpace {
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_NONE,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_RGB,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV444,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_YUV422,
|
||||
NVKMS_DEBUG_FORCE_COLOR_SPACE_MAX,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
||||
struct {
|
||||
@ -102,6 +110,7 @@ NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
NvBool nvkms_hdmi_deepcolor(void);
|
||||
NvBool nvkms_vblank_sem_control(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
enum NvKmsDebugForceColorSpace nvkms_debug_force_color_space(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
@ -110,4 +110,18 @@ NvBool nvKmsSetBacklight(NvU32 display_id, void *drv_priv, NvU32 brightness);
|
||||
|
||||
NvBool nvKmsOpenDevHasSubOwnerPermissionOrBetter(const struct NvKmsPerOpenDev *pOpenDev);
|
||||
|
||||
NvU32 nvKmsKapiF16ToF32Internal(NvU16 a);
|
||||
|
||||
NvU16 nvKmsKapiF32ToF16Internal(NvU32 a);
|
||||
|
||||
NvU32 nvKmsKapiF32MulInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32DivInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32AddInternal(NvU32 a, NvU32 b);
|
||||
|
||||
NvU32 nvKmsKapiF32ToUI32RMinMagInternal(NvU32 a, NvBool exact);
|
||||
|
||||
NvU32 nvKmsKapiUI32ToF32Internal(NvU32 a);
|
||||
|
||||
#endif /* __NV_KMS_H__ */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2023 NVIDIA Corporation
|
||||
Copyright (c) 2013-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -462,6 +462,7 @@
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC3 0x00000044 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_SCC_NB3 0x00000045 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD1 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD2 0x00000047 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD3 0x00000048 /* */
|
||||
#define NV_PFAULT_CLIENT_HUB_GSPLITE1 0x00000049 /* */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -81,7 +81,7 @@
|
||||
#define NUM_Q_ITEMS_IN_MULTITHREAD_TEST (NUM_TEST_Q_ITEMS * NUM_TEST_KTHREADS)
|
||||
|
||||
// This exists in order to have a function to place a breakpoint on:
|
||||
void on_nvq_assert(void)
|
||||
static void on_nvq_assert(void)
|
||||
{
|
||||
(void)NULL;
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
// Ran out of attempts - return thread even if its stack may not be
|
||||
// allocated on the preferred node
|
||||
if ((i == (attempts - 1)))
|
||||
if (i == (attempts - 1))
|
||||
break;
|
||||
|
||||
// Get the NUMA node where the first page of the stack is resident. If
|
||||
|
@ -1,7 +1,6 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_conf_computing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_sec2_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_maxwell_sec2.c
|
||||
@ -12,7 +11,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_blackwell_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvstatus.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nvCpuUuid.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/nv-kthread-q.c
|
||||
@ -36,6 +34,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rb_tree.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_range_device_p2p.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_va_block.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_group.c
|
||||
@ -101,6 +100,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_prefetch.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_ibm.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_faults.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ats_sva.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
|
@ -13,19 +13,6 @@ NVIDIA_UVM_OBJECTS =
|
||||
include $(src)/nvidia-uvm/nvidia-uvm-sources.Kbuild
|
||||
NVIDIA_UVM_OBJECTS += $(patsubst %.c,%.o,$(NVIDIA_UVM_SOURCES))
|
||||
|
||||
# Some linux kernel functions rely on being built with optimizations on and
|
||||
# to work around this we put wrappers for them in a separate file that's built
|
||||
# with optimizations on in debug builds and skipped in other builds.
|
||||
# Notably gcc 4.4 supports per function optimization attributes that would be
|
||||
# easier to use, but is too recent to rely on for now.
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE := nvidia-uvm/uvm_debug_optimized.c
|
||||
NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT := $(patsubst %.c,%.o,$(NVIDIA_UVM_DEBUG_OPTIMIZED_SOURCE))
|
||||
|
||||
ifneq ($(UVM_BUILD_TYPE),debug)
|
||||
# Only build the wrappers on debug builds
|
||||
NVIDIA_UVM_OBJECTS := $(filter-out $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_OBJECTS))
|
||||
endif
|
||||
|
||||
obj-m += nvidia-uvm.o
|
||||
nvidia-uvm-y := $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
@ -36,15 +23,14 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
NVIDIA_UVM_CFLAGS += -O2
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -g
|
||||
endif
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
# work:
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_DEVELOP
|
||||
endif
|
||||
|
||||
NVIDIA_UVM_CFLAGS += -DNVIDIA_UVM_ENABLED
|
||||
@ -56,30 +42,17 @@ NVIDIA_UVM_CFLAGS += -I$(src)/nvidia-uvm
|
||||
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_OBJECTS), $(NVIDIA_UVM_CFLAGS))
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
# Force optimizations on for the wrappers
|
||||
$(call ASSIGN_PER_OBJ_CFLAGS, $(NVIDIA_UVM_DEBUG_OPTIMIZED_OBJECT), $(NVIDIA_UVM_CFLAGS) -O2)
|
||||
endif
|
||||
|
||||
#
|
||||
# Register the conftests needed by nvidia-uvm.ko
|
||||
#
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pci_bus_address
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
@ -88,26 +61,13 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vm_fault_to_errno
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += find_next_bit_wrap
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_is_dma_domain
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += folio_test_swapcache
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
@ -115,6 +75,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += fault_flag_remote_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += struct_page_has_zone_device_data
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -127,9 +127,9 @@ static NV_STATUS uvm_api_mm_initialize(UVM_MM_INITIALIZE_PARAMS *params, struct
|
||||
goto err;
|
||||
}
|
||||
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type != UVM_FD_UNINITIALIZED) {
|
||||
status = NV_ERR_IN_USE;
|
||||
@ -222,10 +222,6 @@ static int uvm_open(struct inode *inode, struct file *filp)
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
@ -325,21 +321,21 @@ static int uvm_release_entry(struct inode *inode, struct file *filp)
|
||||
|
||||
static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie)
|
||||
{
|
||||
uvm_va_range_t *va_range, *va_range_next;
|
||||
uvm_va_range_managed_t *managed_range, *managed_range_next;
|
||||
NvU64 size = 0;
|
||||
|
||||
uvm_assert_rwsem_locked_write(&uvm_va_space_get(vma->vm_file)->lock);
|
||||
uvm_for_each_va_range_in_vma_safe(va_range, va_range_next, vma) {
|
||||
uvm_for_each_va_range_managed_in_vma_safe(managed_range, managed_range_next, vma) {
|
||||
// On exit_mmap (process teardown), current->mm is cleared so
|
||||
// uvm_va_range_vma_current would return NULL.
|
||||
UVM_ASSERT(uvm_va_range_vma(va_range) == vma);
|
||||
UVM_ASSERT(va_range->node.start >= vma->vm_start);
|
||||
UVM_ASSERT(va_range->node.end < vma->vm_end);
|
||||
size += uvm_va_range_size(va_range);
|
||||
UVM_ASSERT(uvm_va_range_vma(managed_range) == vma);
|
||||
UVM_ASSERT(managed_range->va_range.node.start >= vma->vm_start);
|
||||
UVM_ASSERT(managed_range->va_range.node.end < vma->vm_end);
|
||||
size += uvm_va_range_size(&managed_range->va_range);
|
||||
if (make_zombie)
|
||||
uvm_va_range_zombify(va_range);
|
||||
uvm_va_range_zombify(managed_range);
|
||||
else
|
||||
uvm_va_range_destroy(va_range, NULL);
|
||||
uvm_va_range_destroy(&managed_range->va_range, NULL);
|
||||
}
|
||||
|
||||
if (vma->vm_private_data) {
|
||||
@ -351,18 +347,17 @@ static void uvm_destroy_vma_managed(struct vm_area_struct *vma, bool make_zombie
|
||||
|
||||
static void uvm_destroy_vma_semaphore_pool(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
|
||||
va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
va_range = uvm_va_range_find(va_space, vma->vm_start);
|
||||
UVM_ASSERT(va_range &&
|
||||
va_range->node.start == vma->vm_start &&
|
||||
va_range->node.end + 1 == vma->vm_end &&
|
||||
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
|
||||
UVM_ASSERT(semaphore_pool_range &&
|
||||
semaphore_pool_range->va_range.node.start == vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end);
|
||||
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
}
|
||||
|
||||
// If a fault handler is not set, paths like handle_pte_fault in older kernels
|
||||
@ -478,7 +473,7 @@ static void uvm_vm_open_failure(struct vm_area_struct *original,
|
||||
static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
struct vm_area_struct *original;
|
||||
NV_STATUS status;
|
||||
NvU64 new_end;
|
||||
@ -534,13 +529,13 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
goto out;
|
||||
}
|
||||
|
||||
// There can be multiple va_ranges under the vma already. Check if one spans
|
||||
// There can be multiple ranges under the vma already. Check if one spans
|
||||
// the new split boundary. If so, split it.
|
||||
va_range = uvm_va_range_find(va_space, new_end);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
|
||||
if (va_range->node.end != new_end) {
|
||||
status = uvm_va_range_split(va_range, new_end, NULL);
|
||||
managed_range = uvm_va_range_managed_find(va_space, new_end);
|
||||
UVM_ASSERT(managed_range);
|
||||
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
|
||||
if (managed_range->va_range.node.end != new_end) {
|
||||
status = uvm_va_range_split(managed_range, new_end, NULL);
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Failed to split VA range, destroying both: %s. "
|
||||
"original vma [0x%lx, 0x%lx) new vma [0x%lx, 0x%lx)\n",
|
||||
@ -552,10 +547,10 @@ static void uvm_vm_open_managed(struct vm_area_struct *vma)
|
||||
}
|
||||
}
|
||||
|
||||
// Point va_ranges to the new vma
|
||||
uvm_for_each_va_range_in_vma(va_range, vma) {
|
||||
UVM_ASSERT(uvm_va_range_vma_current(va_range) == original);
|
||||
va_range->managed.vma_wrapper = vma->vm_private_data;
|
||||
// Point managed_ranges to the new vma
|
||||
uvm_for_each_va_range_managed_in_vma(managed_range, vma) {
|
||||
UVM_ASSERT(uvm_va_range_vma_current(managed_range) == original);
|
||||
managed_range->vma_wrapper = vma->vm_private_data;
|
||||
}
|
||||
|
||||
out:
|
||||
@ -657,12 +652,12 @@ static struct vm_operations_struct uvm_vm_ops_managed =
|
||||
};
|
||||
|
||||
// vm operations on semaphore pool allocations only control CPU mappings. Unmapping GPUs,
|
||||
// freeing the allocation, and destroying the va_range are handled by UVM_FREE.
|
||||
// freeing the allocation, and destroying the range are handled by UVM_FREE.
|
||||
static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
|
||||
NV_STATUS status;
|
||||
|
||||
@ -670,14 +665,17 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL &&
|
||||
va_range->node.start == origin_vma->vm_start &&
|
||||
va_range->node.end + 1 == origin_vma->vm_end,
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(semaphore_pool_range);
|
||||
UVM_ASSERT_MSG(semaphore_pool_range &&
|
||||
semaphore_pool_range->va_range.node.start == origin_vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == origin_vma->vm_end,
|
||||
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
|
||||
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
|
||||
va_range->node.end + 1, va_range->type);
|
||||
(NvU64)origin_vma->vm_start,
|
||||
(NvU64)origin_vma->vm_end,
|
||||
semaphore_pool_range->va_range.node.start,
|
||||
semaphore_pool_range->va_range.node.end + 1,
|
||||
semaphore_pool_range->va_range.type);
|
||||
|
||||
// Semaphore pool vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
@ -689,9 +687,9 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
|
||||
// uvm_disable_vma unmaps in the parent as well; clear the uvm_mem CPU
|
||||
// user mapping metadata and then remap.
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
|
||||
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, origin_vma);
|
||||
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, origin_vma);
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Failed to remap semaphore pool to CPU for parent after fork; status = %d (%s)",
|
||||
status, nvstatusToString(status));
|
||||
@ -702,7 +700,7 @@ static void uvm_vm_open_semaphore_pool(struct vm_area_struct *vma)
|
||||
origin_vma->vm_private_data = NULL;
|
||||
origin_vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
uvm_mem_unmap_cpu_user(va_range->semaphore_pool.mem);
|
||||
uvm_mem_unmap_cpu_user(semaphore_pool_range->mem);
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
@ -751,10 +749,81 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
#endif
|
||||
};
|
||||
|
||||
static void uvm_vm_open_device_p2p(struct vm_area_struct *vma)
|
||||
{
|
||||
struct vm_area_struct *origin_vma = (struct vm_area_struct *)vma->vm_private_data;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(origin_vma->vm_file);
|
||||
uvm_va_range_t *va_range;
|
||||
bool is_fork = (vma->vm_mm != origin_vma->vm_mm);
|
||||
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
va_range = uvm_va_range_find(va_space, origin_vma->vm_start);
|
||||
UVM_ASSERT(va_range);
|
||||
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_DEVICE_P2P &&
|
||||
va_range->node.start == origin_vma->vm_start &&
|
||||
va_range->node.end + 1 == origin_vma->vm_end,
|
||||
"origin vma [0x%llx, 0x%llx); va_range [0x%llx, 0x%llx) type %d\n",
|
||||
(NvU64)origin_vma->vm_start, (NvU64)origin_vma->vm_end, va_range->node.start,
|
||||
va_range->node.end + 1, va_range->type);
|
||||
|
||||
// Device P2P vmas do not have vma wrappers, but some functions will
|
||||
// assume vm_private_data is a wrapper.
|
||||
vma->vm_private_data = NULL;
|
||||
|
||||
if (is_fork) {
|
||||
// If we forked, leave the parent vma alone.
|
||||
uvm_disable_vma(vma);
|
||||
|
||||
// uvm_disable_vma unmaps in the parent as well so remap the parent
|
||||
uvm_va_range_device_p2p_map_cpu(va_range->va_space, origin_vma, uvm_va_range_to_device_p2p(va_range));
|
||||
}
|
||||
else {
|
||||
// mremap will free the backing pages via unmap so we can't support it.
|
||||
origin_vma->vm_private_data = NULL;
|
||||
origin_vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
vma->vm_ops = &uvm_vm_ops_disabled;
|
||||
unmap_mapping_range(va_space->mapping, va_range->node.start, va_range->node.end - va_range->node.start + 1, 1);
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
uvm_record_unlock_mmap_lock_write(current->mm);
|
||||
}
|
||||
|
||||
static void uvm_vm_open_device_p2p_entry(struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_VOID(uvm_vm_open_device_p2p(vma));
|
||||
}
|
||||
|
||||
// Device P2P pages are only mapped on the CPU. Pages are allocated externally
|
||||
// to UVM but destroying the range must unpin the RM object.
|
||||
static void uvm_vm_close_device_p2p(struct vm_area_struct *vma)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_vm_close_device_p2p_entry(struct vm_area_struct *vma)
|
||||
{
|
||||
UVM_ENTRY_VOID(uvm_vm_close_device_p2p(vma));
|
||||
}
|
||||
|
||||
static struct vm_operations_struct uvm_vm_ops_device_p2p =
|
||||
{
|
||||
.open = uvm_vm_open_device_p2p_entry,
|
||||
.close = uvm_vm_close_device_p2p_entry,
|
||||
|
||||
#if defined(NV_VM_OPS_FAULT_REMOVED_VMA_ARG)
|
||||
.fault = uvm_vm_fault_sigbus_wrapper_entry,
|
||||
#else
|
||||
.fault = uvm_vm_fault_sigbus_entry,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
bool vma_wrapper_allocated = false;
|
||||
@ -845,18 +914,28 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
status = uvm_va_range_create_mmap(va_space, current->mm, vma->vm_private_data, NULL);
|
||||
|
||||
if (status == NV_ERR_UVM_ADDRESS_IN_USE) {
|
||||
uvm_va_range_semaphore_pool_t *semaphore_pool_range;
|
||||
uvm_va_range_device_p2p_t *device_p2p_range;
|
||||
// If the mmap is for a semaphore pool, the VA range will have been
|
||||
// allocated by a previous ioctl, and the mmap just creates the CPU
|
||||
// mapping.
|
||||
va_range = uvm_va_range_find(va_space, vma->vm_start);
|
||||
if (va_range && va_range->node.start == vma->vm_start &&
|
||||
va_range->node.end + 1 == vma->vm_end &&
|
||||
va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) {
|
||||
semaphore_pool_range = uvm_va_range_semaphore_pool_find(va_space, vma->vm_start);
|
||||
device_p2p_range = uvm_va_range_device_p2p_find(va_space, vma->vm_start);
|
||||
if (semaphore_pool_range && semaphore_pool_range->va_range.node.start == vma->vm_start &&
|
||||
semaphore_pool_range->va_range.node.end + 1 == vma->vm_end) {
|
||||
uvm_vma_wrapper_destroy(vma->vm_private_data);
|
||||
vma_wrapper_allocated = false;
|
||||
vma->vm_private_data = vma;
|
||||
vma->vm_ops = &uvm_vm_ops_semaphore_pool;
|
||||
status = uvm_mem_map_cpu_user(va_range->semaphore_pool.mem, va_range->va_space, vma);
|
||||
status = uvm_mem_map_cpu_user(semaphore_pool_range->mem, semaphore_pool_range->va_range.va_space, vma);
|
||||
}
|
||||
else if (device_p2p_range && device_p2p_range->va_range.node.start == vma->vm_start &&
|
||||
device_p2p_range->va_range.node.end + 1 == vma->vm_end) {
|
||||
uvm_vma_wrapper_destroy(vma->vm_private_data);
|
||||
vma_wrapper_allocated = false;
|
||||
vma->vm_private_data = vma;
|
||||
vma->vm_ops = &uvm_vm_ops_device_p2p;
|
||||
status = uvm_va_range_device_p2p_map_cpu(va_space, vma, device_p2p_range);
|
||||
}
|
||||
}
|
||||
|
||||
@ -914,8 +993,9 @@ static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type = atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
@ -1001,6 +1081,9 @@ static long uvm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAN_UP_ZOMBIE_RESOURCES, uvm_api_clean_up_zombie_resources);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_POPULATE_PAGEABLE, uvm_api_populate_pageable);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_VALIDATE_VA_RANGE, uvm_api_validate_va_range);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_V2,uvm_api_tools_get_processor_uuid_table_v2);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_ALLOC_DEVICE_P2P, uvm_api_alloc_device_p2p);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_CLEAR_ALL_ACCESS_COUNTERS, uvm_api_clear_all_access_counters);
|
||||
}
|
||||
|
||||
// Try the test ioctls if none of the above matched
|
||||
|
@ -45,20 +45,20 @@
|
||||
// #endif
|
||||
// 3) Do the same thing for the function definition, and for any structs that
|
||||
// are taken as arguments to these functions.
|
||||
// 4) Let this change propagate over to cuda_a and dev_a, so that the CUDA and
|
||||
// nvidia-cfg libraries can start using the new API by bumping up the API
|
||||
// 4) Let this change propagate over to cuda_a and bugfix_main, so that the CUDA
|
||||
// and nvidia-cfg libraries can start using the new API by bumping up the API
|
||||
// version number it's using.
|
||||
// Places where UVM_API_REVISION is defined are:
|
||||
// drivers/gpgpu/cuda/cuda.nvmk (cuda_a)
|
||||
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (dev_a)
|
||||
// 5) Once the dev_a and cuda_a changes have made it back into chips_a,
|
||||
// drivers/setup/linux/nvidia-cfg/makefile.nvmk (bugfix_main)
|
||||
// 5) Once the bugfix_main and cuda_a changes have made it back into chips_a,
|
||||
// remove the old API declaration, definition, and any old structs that were
|
||||
// in use.
|
||||
|
||||
#ifndef _UVM_H_
|
||||
#define _UVM_H_
|
||||
|
||||
#define UVM_API_LATEST_REVISION 12
|
||||
#define UVM_API_LATEST_REVISION 13
|
||||
|
||||
#if !defined(UVM_API_REVISION)
|
||||
#error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
|
||||
@ -384,36 +384,8 @@ NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid
|
||||
// because it is not very informative.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
|
||||
#else
|
||||
NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(8)
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmRegisterGpuSmc
|
||||
//
|
||||
// The same as UvmRegisterGpu, but takes additional parameters to specify the
|
||||
// GPU partition being registered if SMC is enabled.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU of the SMC partition to register.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// User handles identifying the partition to register.
|
||||
//
|
||||
// Error codes (see UvmRegisterGpu also):
|
||||
//
|
||||
// NV_ERR_INVALID_STATE:
|
||||
// SMC was not enabled, or the partition identified by the user
|
||||
// handles or its configuration changed.
|
||||
//
|
||||
NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
|
||||
const UvmGpuPlatformParams *platformParams);
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmUnregisterGpu
|
||||
@ -1364,6 +1336,86 @@ NV_STATUS UvmAllocSemaphorePool(void *base,
|
||||
const UvmGpuMappingAttributes *perGpuAttribs,
|
||||
NvLength gpuAttribsCount);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmAllocDeviceP2P
|
||||
//
|
||||
// Create a VA range within the process's address space reserved for use by
|
||||
// other devices to directly access GPU memory. The memory associated with the
|
||||
// RM handle is mapped into the user address space associated with the range for
|
||||
// direct access from the CPU.
|
||||
//
|
||||
// The VA range must not overlap with an existing VA range, irrespective of
|
||||
// whether the existing range corresponds to a UVM allocation or an external
|
||||
// allocation.
|
||||
//
|
||||
// Multiple VA ranges may be created mapping the same physical memory associated
|
||||
// with the RM handle. The associated GPU memory will not be freed until all VA
|
||||
// ranges have been destroyed either explicitly or implicitly and all non-UVM
|
||||
// users (eg. third party device drivers) have stopped using the associated
|
||||
// GPU memory.
|
||||
//
|
||||
// The VA range can be unmapped and freed by calling UvmFree.
|
||||
//
|
||||
// Destroying the final range mapping the RM handle may block until all third
|
||||
// party device drivers and other kernel users have stopped using the memory.
|
||||
//
|
||||
// These VA ranges are only associated with a single GPU.
|
||||
//
|
||||
// Arguments:
|
||||
// gpuUuid: (INPUT)
|
||||
// UUID of the physical GPU if the GPU is not SMC capable or SMC
|
||||
// enabled, or the GPU instance UUID of the partition containing the
|
||||
// memory to be mapped on the CPU.
|
||||
//
|
||||
// base: (INPUT)
|
||||
// Base address of the virtual address range.
|
||||
//
|
||||
// length: (INPUT)
|
||||
// Length, in bytes, of the range.
|
||||
//
|
||||
// offset: (INPUT)
|
||||
// Offset, in bytes, from the start of the externally allocated memory
|
||||
// to map from.
|
||||
//
|
||||
// platformParams: (INPUT)
|
||||
// Platform specific parameters that identify the allocation.
|
||||
// On Linux: RM ctrl fd, hClient and the handle (hMemory) of the
|
||||
// externally allocated memory to map.
|
||||
//
|
||||
// Errors:
|
||||
//
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// base is NULL or length is zero or at least one of base and length is
|
||||
// not aligned to 4K.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// The gpuUuid was either not registered or has no GPU VA space
|
||||
// registered for it.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// base + offset + length exceeeds the end of the externally allocated
|
||||
// memory handle or the externally allocated handle is not valid.
|
||||
//
|
||||
// NV_ERR_UVM_ADDRESS_IN_USE:
|
||||
// The requested virtual address range overlaps with an existing
|
||||
// allocation.
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The device peer-to-peer feature is not supported by the current
|
||||
// system configuration. This may be because the GPU doesn't support
|
||||
// the peer-to-peer feature or the kernel was not built with the correct
|
||||
// configuration options.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmAllocDeviceP2P(NvProcessorUuid gpuUuid,
|
||||
void *base,
|
||||
NvLength length,
|
||||
NvLength offset,
|
||||
const UvmDeviceP2PPlatformParams *platformParams);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmMigrate
|
||||
//
|
||||
@ -3276,7 +3328,7 @@ NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle,
|
||||
UvmEventQueueHandle queueHandle,
|
||||
UvmEventEntry_V1 *pBuffer,
|
||||
UvmEventEntry *pBuffer,
|
||||
NvU64 *nEntries);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -3472,32 +3524,21 @@ NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
|
||||
// 4. Destroy event Queue using UvmToolsDestroyEventQueue
|
||||
//
|
||||
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
// This is deprecated and replaced by sizeof(UvmToolsEventControlData).
|
||||
NvLength UvmToolsGetEventControlSize(void);
|
||||
|
||||
// This is deprecated and replaced by sizeof(UvmEventEntry_V1) or
|
||||
// sizeof(UvmEventEntry_V2).
|
||||
NvLength UvmToolsGetEventEntrySize(void);
|
||||
#endif
|
||||
|
||||
NvLength UvmToolsGetNumberOfCounters(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateEventQueue
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of events.
|
||||
// All events are disabled by default. Event queue data persists lifetime of the
|
||||
// target process.
|
||||
// This function is deprecated. See UvmToolsCreateEventQueue_V2.
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of
|
||||
// UvmEventEntry events. All events are disabled by default. Event queue data
|
||||
// persists lifetime of the target process.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for events or counters.
|
||||
// See UvmToolsEventQueueVersion.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
@ -3520,8 +3561,55 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
// NV_ERR_INSUFFICIENT_RESOURCES:
|
||||
// There could be multiple reasons for this error. One would be that
|
||||
// it's not possible to allocate a queue of requested size. Another
|
||||
// would be either event_buffer or event_control memory couldn't be
|
||||
// pinned (e.g. because of OS limitation of pinnable memory). Also it
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsCreateEventQueue_V2
|
||||
//
|
||||
// This call creates an event queue that can hold the given number of
|
||||
// UvmEventEntry_V2 events. All events are disabled by default. Event queue data
|
||||
// persists beyond the lifetime of the target process.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// event_buffer: (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold at least event_buffer_size events. Gets pinned until queue is
|
||||
// destroyed.
|
||||
//
|
||||
// event_buffer_size: (INPUT)
|
||||
// Size of the event queue buffer in units of UvmEventEntry_V2's. Must
|
||||
// be a power of two, and greater than 1.
|
||||
//
|
||||
// event_control (INPUT)
|
||||
// User allocated buffer. Must be page-aligned. Must be large enough to
|
||||
// hold UvmToolsEventControlData (although single page-size allocation
|
||||
// should be more than enough). Gets pinned until queue is destroyed.
|
||||
//
|
||||
// queue: (OUTPUT)
|
||||
// Handle to the created queue.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INSUFFICIENT_PERMISSIONS:
|
||||
// Session handle does not refer to a valid session
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// One of the parameters: event_buffer, event_buffer_size, event_control
|
||||
// is not valid
|
||||
//
|
||||
@ -3538,20 +3626,11 @@ NvLength UvmToolsGetNumberOfCounters(void);
|
||||
// could not have been possible to create UvmToolsEventQueueDescriptor.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(10)
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#else
|
||||
NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
#endif
|
||||
NV_STATUS UvmToolsCreateEventQueue_V2(UvmToolsSessionHandle session,
|
||||
void *event_buffer,
|
||||
NvLength event_buffer_size,
|
||||
void *event_control,
|
||||
UvmToolsEventQueueHandle *queue);
|
||||
|
||||
UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
|
||||
|
||||
@ -3967,6 +4046,8 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsGetProcessorUuidTable
|
||||
//
|
||||
// This function is deprecated. See UvmToolsGetProcessorUuidTable_V2.
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
@ -3979,55 +4060,63 @@ NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session,
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// version: (INPUT)
|
||||
// Requested version for the UUID table returned. The version must
|
||||
// match the requested version of the event queue created with
|
||||
// UvmToolsCreateEventQueue(). See UvmToolsEventQueueVersion.
|
||||
// If the version of the event queue does not match the version of the
|
||||
// UUID table, the behavior is undefined.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The number of elements in the array must be greater
|
||||
// or equal to UVM_MAX_PROCESSORS_V1 if the version is
|
||||
// UvmToolsEventQueueVersion_V1 and UVM_MAX_PROCESSORS if the version is
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// or equal to UVM_MAX_PROCESSORS_V1.
|
||||
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
|
||||
// index this array. Unused indices will have a UUID of zero.
|
||||
// If version is UvmToolsEventQueueVersion_V1 then the reported UUID
|
||||
// will be that of the corresponding physical GPU, even if multiple SMC
|
||||
// partitions are registered under that physical GPU. If version is
|
||||
// UvmToolsEventQueueVersion_V2 then the reported UUID will be the GPU
|
||||
// instance UUID if SMC is enabled, otherwise it will be the UUID of
|
||||
// the physical GPU.
|
||||
// The reported UUID will be that of the corresponding physical GPU,
|
||||
// even if multiple SMC partitions are registered under that physical
|
||||
// GPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
//
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// The version is not UvmToolsEventQueueVersion_V1 or
|
||||
// UvmToolsEventQueueVersion_V2.
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsGetProcessorUuidTable_V2
|
||||
//
|
||||
// Populate a table with the UUIDs of all the currently registered processors
|
||||
// in the target process. When a GPU is registered, it is added to the table.
|
||||
// When a GPU is unregistered, it is removed. As long as a GPU remains
|
||||
// registered, its index in the table does not change.
|
||||
// Note that the index in the table corresponds to the processor ID reported
|
||||
// in UvmEventEntry event records and that the table is not contiguously packed
|
||||
// with non-zero UUIDs even with no GPU unregistrations.
|
||||
//
|
||||
// Arguments:
|
||||
// session: (INPUT)
|
||||
// Handle to the tools session.
|
||||
//
|
||||
// table: (OUTPUT)
|
||||
// Array of processor UUIDs, including the CPU's UUID which is always
|
||||
// at index zero. The number of elements in the array must be greater
|
||||
// or equal to UVM_MAX_PROCESSORS.
|
||||
// The srcIndex and dstIndex fields of the UvmEventMigrationInfo struct
|
||||
// index this array. Unused indices will have a UUID of zero.
|
||||
// The reported UUID will be the GPU instance UUID if SMC is enabled,
|
||||
// otherwise it will be the UUID of the physical GPU.
|
||||
//
|
||||
// Error codes:
|
||||
// NV_ERR_INVALID_ADDRESS:
|
||||
// writing to table failed.
|
||||
//
|
||||
// NV_ERR_NOT_SUPPORTED:
|
||||
// The kernel is not able to support the requested version
|
||||
// (i.e., the UVM kernel driver is older and doesn't support
|
||||
// UvmToolsEventQueueVersion_V2).
|
||||
// The UVM kernel driver is older and doesn't support
|
||||
// UvmToolsGetProcessorUuidTable_V2.
|
||||
//
|
||||
// NV_ERR_NO_MEMORY:
|
||||
// Internal memory allocation failed.
|
||||
//------------------------------------------------------------------------------
|
||||
#if UVM_API_REV_IS_AT_MOST(11)
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table,
|
||||
NvLength table_size,
|
||||
NvLength *count);
|
||||
#else
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session,
|
||||
UvmToolsEventQueueVersion version,
|
||||
NvProcessorUuid *table);
|
||||
#endif
|
||||
NV_STATUS UvmToolsGetProcessorUuidTable_V2(UvmToolsSessionHandle session,
|
||||
NvProcessorUuid *table);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// UvmToolsFlushEvents
|
||||
|
@ -47,7 +47,7 @@
|
||||
{ \
|
||||
params_type params; \
|
||||
BUILD_BUG_ON(sizeof(params) > UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
if (copy_from_user(¶ms, (void __user*)arg, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
@ -60,7 +60,7 @@
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
if (copy_to_user((void __user*)arg, ¶ms, sizeof(params))) \
|
||||
return -EFAULT; \
|
||||
\
|
||||
return 0; \
|
||||
@ -84,7 +84,7 @@
|
||||
if (!params) \
|
||||
return -ENOMEM; \
|
||||
BUILD_BUG_ON(sizeof(*params) <= UVM_MAX_IOCTL_PARAM_STACK_SIZE); \
|
||||
if (nv_copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
if (copy_from_user(params, (void __user*)arg, sizeof(*params))) { \
|
||||
uvm_kvfree(params); \
|
||||
return -EFAULT; \
|
||||
} \
|
||||
@ -99,7 +99,7 @@
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
\
|
||||
if (nv_copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
if (copy_to_user((void __user*)arg, params, sizeof(*params))) \
|
||||
ret = -EFAULT; \
|
||||
\
|
||||
uvm_kvfree(params); \
|
||||
@ -244,6 +244,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_enable_system_wide_atomics(UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_disable_system_wide_atomics(UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_init_event_tracker_v2(UVM_TOOLS_INIT_EVENT_TRACKER_V2_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_set_notification_threshold(UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_event_queue_enable_events(UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_tools_event_queue_disable_events(UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS *params, struct file *filp);
|
||||
@ -256,5 +257,7 @@ NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file
|
||||
NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_populate_pageable(const UVM_POPULATE_PAGEABLE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_alloc_device_p2p(UVM_ALLOC_DEVICE_P2P_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif // __UVM_API_H__
|
||||
|
@ -90,18 +90,19 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.dst_id = ats_context->residency_id,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.populate_on_migrate_vma_failures = fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
};
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
@ -112,7 +113,7 @@ static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// set skip_mapped to true. For pages already mapped, this will only handle
|
||||
// PTE upgrades if needed.
|
||||
status = uvm_migrate_pageable(&uvm_migrate_args);
|
||||
if (status == NV_WARN_NOTHING_TO_DO)
|
||||
if (fault_service_type && (status == NV_WARN_NOTHING_TO_DO))
|
||||
status = NV_OK;
|
||||
|
||||
UVM_ASSERT(status != NV_ERR_MORE_PROCESSING_REQUIRED);
|
||||
@ -379,14 +380,20 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
{
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *accessed_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
|
||||
accessed_mask = &ats_context->faults.accessed_mask;
|
||||
else
|
||||
accessed_mask = &ats_context->access_counters.accessed_mask;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return;
|
||||
|
||||
@ -406,7 +413,7 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
@ -420,6 +427,11 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS)
|
||||
accessed_mask = &ats_context->faults.accessed_mask;
|
||||
else
|
||||
accessed_mask = &ats_context->access_counters.accessed_mask;
|
||||
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
@ -432,12 +444,12 @@ static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
}
|
||||
else {
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, service_type, ats_context, max_prefetch_region);
|
||||
}
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
@ -459,10 +471,10 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
|
||||
|
||||
@ -637,6 +649,8 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_page_mask_zero(&ats_context->access_counters.migrated_mask);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
@ -650,21 +664,24 @@ NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// Remove pages which are already resident at the intended destination from
|
||||
// the accessed_mask.
|
||||
uvm_page_mask_andnot(&ats_context->accessed_mask,
|
||||
&ats_context->accessed_mask,
|
||||
uvm_page_mask_andnot(&ats_context->access_counters.accessed_mask,
|
||||
&ats_context->access_counters.accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->access_counters.accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
|
||||
uvm_page_mask_t *migrated_mask = &ats_context->access_counters.migrated_mask;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
if (status == NV_OK)
|
||||
uvm_page_mask_region_fill(migrated_mask, subregion);
|
||||
else if (status != NV_WARN_NOTHING_TO_DO)
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -29,18 +29,18 @@
|
||||
|
||||
// Service ATS faults in the range (base, base + UVM_VA_BLOCK_SIZE) with service
|
||||
// type for individual pages in the range requested by page masks set in
|
||||
// ats_context->read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// ats_context->fault.read_fault_mask/write_fault_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that faulting
|
||||
// addresses fall completely within the VMA. The caller is also responsible for
|
||||
// ensuring that the faulting addresses don't overlap a GMMU region. (See
|
||||
// uvm_ats_check_in_gmmu_region). The caller is also responsible for handling
|
||||
// any errors returned by this function (fault cancellations etc.).
|
||||
//
|
||||
// Returns the fault service status in ats_context->faults_serviced_mask. In
|
||||
// addition, ats_context->reads_serviced_mask returns whether read servicing
|
||||
// worked on write faults iff the read service was also requested in the
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// Returns the fault service status in ats_context->fault.faults_serviced_mask.
|
||||
// In addition, ats_context->fault.reads_serviced_mask returns whether read
|
||||
// servicing worked on write faults iff the read service was also requested in
|
||||
// the corresponding bit in read_fault_mask. These returned masks are only valid
|
||||
// if the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
@ -52,9 +52,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
// Service access counter notifications on ATS regions in the range (base, base
|
||||
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
|
||||
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
|
||||
// The caller is responsible for ensuring that the addresses in the
|
||||
// accessed_mask is completely covered by the VMA. The caller is also
|
||||
// set in ats_context->access_counters.accessed_mask. base must be aligned to
|
||||
// UVM_VA_BLOCK_SIZE. The caller is responsible for ensuring that the addresses
|
||||
// in the accessed_mask is completely covered by the VMA. The caller is also
|
||||
// responsible for handling any errors returned by this function.
|
||||
//
|
||||
// Returns NV_OK if servicing was successful. Any other error indicates an error
|
||||
|
@ -127,12 +127,12 @@ static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
|
||||
// We always use VCMDQ127 for the WAR
|
||||
#define VCMDQ 127
|
||||
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
static void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
static NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
@ -855,6 +855,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@ -869,6 +870,7 @@ static NV_STATUS cpu_decrypt_in_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@ -879,6 +881,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
uvm_mem_t *dst_mem,
|
||||
uvm_mem_t *src_mem,
|
||||
const UvmCslIv *decrypt_iv,
|
||||
NvU32 key_version,
|
||||
uvm_mem_t *auth_tag_mem,
|
||||
size_t size,
|
||||
NvU32 copy_size)
|
||||
@ -896,6 +899,7 @@ static NV_STATUS cpu_decrypt_out_of_order(uvm_channel_t *channel,
|
||||
dst_plain + i * copy_size,
|
||||
src_cipher + i * copy_size,
|
||||
decrypt_iv + i,
|
||||
key_version,
|
||||
copy_size,
|
||||
auth_tag_buffer + i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE));
|
||||
}
|
||||
@ -959,7 +963,7 @@ static void gpu_encrypt(uvm_push_t *push,
|
||||
i * UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
|
||||
dst_cipher);
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, decrypt_iv);
|
||||
uvm_conf_computing_log_gpu_encryption(push->channel, copy_size, decrypt_iv);
|
||||
|
||||
if (i > 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
@ -1020,6 +1024,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
|
||||
UvmCslIv *decrypt_iv = NULL;
|
||||
UvmCslIv *encrypt_iv = NULL;
|
||||
NvU32 key_version;
|
||||
uvm_tracker_t tracker;
|
||||
size_t src_plain_size;
|
||||
|
||||
@ -1089,6 +1094,11 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
|
||||
gpu_encrypt(&push, dst_cipher, dst_plain_gpu, auth_tag_mem, decrypt_iv, size, copy_size);
|
||||
|
||||
// There shouldn't be any key rotation between the end of the push and the
|
||||
// CPU decryption(s), but it is more robust against test changes to force
|
||||
// decryption to use the saved key.
|
||||
key_version = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
|
||||
|
||||
TEST_CHECK_GOTO(!mem_match(src_plain, src_cipher, size), out);
|
||||
@ -1101,6 +1111,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
@ -1111,6 +1122,7 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu,
|
||||
dst_plain,
|
||||
dst_cipher,
|
||||
decrypt_iv,
|
||||
key_version,
|
||||
auth_tag_mem,
|
||||
size,
|
||||
copy_size),
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -228,21 +228,65 @@ typedef struct
|
||||
// variant is required when the thread holding the pool lock must sleep
|
||||
// (ex: acquire another mutex) deeper in the call stack, either in UVM or
|
||||
// RM.
|
||||
union {
|
||||
union
|
||||
{
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its internal
|
||||
// operation and each push may modify this state. push_locks is protected by
|
||||
// the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
struct
|
||||
{
|
||||
// Secure operations require that uvm_push_begin order matches
|
||||
// uvm_push_end order, because the engine's state is used in its
|
||||
// internal operation and each push may modify this state.
|
||||
// push_locks is protected by the channel pool lock.
|
||||
DECLARE_BITMAP(push_locks, UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
// Counting semaphore for available and unlocked channels, it must be
|
||||
// acquired before submitting work to a channel when the Confidential
|
||||
// Computing feature is enabled.
|
||||
uvm_semaphore_t push_sem;
|
||||
|
||||
// Per channel buffers in unprotected sysmem.
|
||||
uvm_rm_mem_t *pool_sysmem;
|
||||
|
||||
// Per channel buffers in protected vidmem.
|
||||
uvm_rm_mem_t *pool_vidmem;
|
||||
|
||||
struct
|
||||
{
|
||||
// Current encryption key version, incremented upon key rotation.
|
||||
// While there are separate keys for encryption and decryption, the
|
||||
// two keys are rotated at once, so the versioning applies to both.
|
||||
NvU32 version;
|
||||
|
||||
// Lock used to ensure mutual exclusion during key rotation.
|
||||
uvm_mutex_t mutex;
|
||||
|
||||
// CSL contexts passed to RM for key rotation. This is usually an
|
||||
// array containing the CSL contexts associated with the channels in
|
||||
// the pool. In the case of the WLC pool, the array also includes
|
||||
// CSL contexts associated with LCIC channels.
|
||||
UvmCslContext **csl_contexts;
|
||||
|
||||
// Number of elements in the CSL context array.
|
||||
unsigned num_csl_contexts;
|
||||
|
||||
// Number of bytes encrypted, or decrypted, on the engine associated
|
||||
// with the pool since the last key rotation. Only used during
|
||||
// testing, to force key rotations after a certain encryption size,
|
||||
// see UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD.
|
||||
//
|
||||
// Encryptions on a LCIC pool are accounted for in the paired WLC
|
||||
// pool.
|
||||
//
|
||||
// TODO: Bug 4612912: these accounting variables can be removed once
|
||||
// RM exposes an API to set the key rotation lower threshold.
|
||||
atomic64_t encrypted;
|
||||
atomic64_t decrypted;
|
||||
} key_rotation;
|
||||
|
||||
} conf_computing;
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@ -322,43 +366,14 @@ struct uvm_channel_struct
|
||||
// work launches to match the order of push end-s that triggered them.
|
||||
volatile NvU32 gpu_put;
|
||||
|
||||
// Static pushbuffer for channels with static schedule (WLC/LCIC)
|
||||
uvm_rm_mem_t *static_pb_protected_vidmem;
|
||||
|
||||
// Static pushbuffer staging buffer for WLC
|
||||
uvm_rm_mem_t *static_pb_unprotected_sysmem;
|
||||
void *static_pb_unprotected_sysmem_cpu;
|
||||
void *static_pb_unprotected_sysmem_auth_tag_cpu;
|
||||
|
||||
// The above static locations are required by the WLC (and LCIC)
|
||||
// schedule. Protected sysmem location completes WLC's independence
|
||||
// from the pushbuffer allocator.
|
||||
// Protected sysmem location makes WLC independent from the pushbuffer
|
||||
// allocator. Unprotected sysmem and protected vidmem counterparts
|
||||
// are allocated from the channel pool (sysmem, vidmem).
|
||||
void *static_pb_protected_sysmem;
|
||||
|
||||
// Static tracking semaphore notifier values
|
||||
// Because of LCIC's fixed schedule, the secure semaphore release
|
||||
// mechanism uses two additional static locations for incrementing the
|
||||
// notifier values. See:
|
||||
// . channel_semaphore_secure_release()
|
||||
// . setup_lcic_schedule()
|
||||
// . internal_channel_submit_work_wlc()
|
||||
uvm_rm_mem_t *static_notifier_unprotected_sysmem;
|
||||
NvU32 *static_notifier_entry_unprotected_sysmem_cpu;
|
||||
NvU32 *static_notifier_exit_unprotected_sysmem_cpu;
|
||||
uvm_gpu_address_t static_notifier_entry_unprotected_sysmem_gpu_va;
|
||||
uvm_gpu_address_t static_notifier_exit_unprotected_sysmem_gpu_va;
|
||||
|
||||
// Explicit location for push launch tag used by WLC.
|
||||
// Encryption auth tags have to be located in unprotected sysmem.
|
||||
void *launch_auth_tag_cpu;
|
||||
NvU64 launch_auth_tag_gpu_va;
|
||||
|
||||
// Used to decrypt the push back to protected sysmem.
|
||||
// This happens when profilers register callbacks for migration data.
|
||||
uvm_push_crypto_bundle_t *push_crypto_bundles;
|
||||
|
||||
// Accompanying authentication tags for the crypto bundles
|
||||
uvm_rm_mem_t *push_crypto_bundle_auth_tags;
|
||||
} conf_computing;
|
||||
|
||||
// RM channel information
|
||||
@ -451,6 +466,16 @@ struct uvm_channel_manager_struct
|
||||
UVM_BUFFER_LOCATION gpput_loc;
|
||||
UVM_BUFFER_LOCATION pushbuffer_loc;
|
||||
} conf;
|
||||
|
||||
struct
|
||||
{
|
||||
// Flag indicating that the WLC/LCIC mechanism is ready/setup; should
|
||||
// only be false during (de)initialization.
|
||||
bool wlc_ready;
|
||||
|
||||
// True indicates that key rotation is enabled (UVM-wise).
|
||||
bool key_rotation_enabled;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
// Create a channel manager for the GPU
|
||||
@ -501,6 +526,12 @@ uvm_channel_t *uvm_channel_lcic_get_paired_wlc(uvm_channel_t *lcic_channel);
|
||||
|
||||
uvm_channel_t *uvm_channel_wlc_get_paired_lcic(uvm_channel_t *wlc_channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_protected_vidmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
NvU64 uvm_channel_get_static_pb_unprotected_sysmem_gpu_va(uvm_channel_t *channel);
|
||||
|
||||
char* uvm_channel_get_static_pb_unprotected_sysmem_cpu(uvm_channel_t *channel);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(uvm_pool_type_is_valid(pool->pool_type));
|
||||
@ -532,6 +563,17 @@ static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
|
||||
return UVM_CHANNEL_TYPE_MEMOPS;
|
||||
}
|
||||
|
||||
// Force key rotation in the engine associated with the given channel pool.
|
||||
// Rotation may still not happen if RM cannot acquire the necessary locks (in
|
||||
// which case the function returns NV_ERR_STATE_IN_USE).
|
||||
//
|
||||
// This function should be only invoked in pools in which key rotation is
|
||||
// enabled.
|
||||
NV_STATUS uvm_channel_pool_rotate_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Retrieve the current encryption key version associated with the channel pool.
|
||||
NvU32 uvm_channel_pool_key_version(uvm_channel_pool_t *pool);
|
||||
|
||||
// Privileged channels support all the Host and engine methods, while
|
||||
// non-privileged channels don't support privileged methods.
|
||||
//
|
||||
@ -579,12 +621,9 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
// beginning.
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
|
||||
|
||||
// Check if WLC/LCIC mechanism is ready/setup
|
||||
// Should only return false during initialization
|
||||
static bool uvm_channel_manager_is_wlc_ready(uvm_channel_manager_t *manager)
|
||||
{
|
||||
return (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_WLC] != NULL) &&
|
||||
(manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_LCIC] != NULL);
|
||||
return manager->conf_computing.wlc_ready;
|
||||
}
|
||||
// Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
|
||||
// associated with access_channel.
|
||||
|
@ -796,11 +796,8 @@ done:
|
||||
static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_push_t *pushes;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_push_t *pushes = NULL;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
@ -810,9 +807,19 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_type_t channel_type;
|
||||
|
||||
// Key rotation is disabled because this test relies on nested pushes,
|
||||
// which is illegal. If any push other than the first one triggers key
|
||||
// rotation, the test won't complete. This is because key rotation
|
||||
// depends on waiting for ongoing pushes to end, which doesn't happen
|
||||
// if those pushes are ended after the current one begins.
|
||||
uvm_conf_computing_disable_key_rotation(gpu);
|
||||
|
||||
for (channel_type = 0; channel_type < UVM_CHANNEL_TYPE_COUNT; channel_type++) {
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
TEST_CHECK_RET(pool != NULL);
|
||||
NvU32 i;
|
||||
NvU32 num_pushes;
|
||||
uvm_channel_pool_t *pool = gpu->channel_manager->pool_to_use.default_for_type[channel_type];
|
||||
|
||||
TEST_CHECK_GOTO(pool != NULL, error);
|
||||
|
||||
// Skip LCIC channels as those can't accept any pushes
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
@ -824,7 +831,7 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
num_pushes = min(pool->num_channels, (NvU32)UVM_PUSH_MAX_CONCURRENT_PUSHES);
|
||||
|
||||
pushes = uvm_kvmalloc_zero(sizeof(*pushes) * num_pushes);
|
||||
TEST_CHECK_RET(pushes != NULL);
|
||||
TEST_CHECK_GOTO(pushes != NULL, error);
|
||||
|
||||
for (i = 0; i < num_pushes; i++) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
@ -841,12 +848,18 @@ static NV_STATUS test_conf_computing_channel_selection(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_kvfree(pushes);
|
||||
}
|
||||
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
}
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
return status;
|
||||
|
||||
error:
|
||||
if (gpu != NULL)
|
||||
uvm_conf_computing_enable_key_rotation(gpu);
|
||||
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
uvm_kvfree(pushes);
|
||||
|
||||
@ -948,6 +961,318 @@ release:
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotations(uvm_channel_pool_t *pool, unsigned num_rotations)
|
||||
{
|
||||
unsigned num_tries;
|
||||
unsigned max_num_tries = 20;
|
||||
unsigned num_rotations_completed = 0;
|
||||
|
||||
if (num_rotations == 0)
|
||||
return NV_OK;
|
||||
|
||||
// The number of accepted rotations is kept low, so failed rotation
|
||||
// invocations due to RM not acquiring the necessary locks (which imply a
|
||||
// sleep in the test) do not balloon the test execution time.
|
||||
UVM_ASSERT(num_rotations <= 10);
|
||||
|
||||
for (num_tries = 0; (num_tries < max_num_tries) && (num_rotations_completed < num_rotations); num_tries++) {
|
||||
// Force key rotation, irrespective of encryption usage.
|
||||
NV_STATUS status = uvm_channel_pool_rotate_key(pool);
|
||||
|
||||
// Key rotation may not be able to complete due to RM failing to acquire
|
||||
// the necessary locks. Detect the situation, sleep for a bit, and then
|
||||
// try again
|
||||
//
|
||||
// The maximum time spent sleeping in a single rotation call is
|
||||
// (max_num_tries * max_sleep_us)
|
||||
if (status == NV_ERR_STATE_IN_USE) {
|
||||
NvU32 min_sleep_us = 1000;
|
||||
NvU32 max_sleep_us = 10000;
|
||||
|
||||
usleep_range(min_sleep_us, max_sleep_us);
|
||||
continue;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(status);
|
||||
|
||||
num_rotations_completed++;
|
||||
}
|
||||
|
||||
// If not a single key rotation occurred, the dependent tests still pass,
|
||||
// but there is no much value to them. Instead, return an error so the
|
||||
// maximum number of tries, or the maximum sleep time, are adjusted to
|
||||
// ensure that at least one rotation completes.
|
||||
if (num_rotations_completed > 0)
|
||||
return NV_OK;
|
||||
else
|
||||
return NV_ERR_STATE_IN_USE;
|
||||
}
|
||||
|
||||
static NV_STATUS force_key_rotation(uvm_channel_pool_t *pool)
|
||||
{
|
||||
return force_key_rotations(pool, 1);
|
||||
}
|
||||
|
||||
// Test key rotation in all pools. This is useful because key rotation may not
|
||||
// happen otherwise on certain engines during UVM test execution. For example,
|
||||
// if the MEMOPS channel type is mapped to a CE not shared with any other
|
||||
// channel type, then the only encryption taking place in the engine is due to
|
||||
// semaphore releases (4 bytes each). This small encryption size makes it
|
||||
// unlikely to exceed even small rotation thresholds.
|
||||
static NV_STATUS test_channel_key_rotation_basic(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
uvm_for_each_pool(pool, gpu->channel_manager) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
continue;
|
||||
|
||||
TEST_NV_CHECK_RET(force_key_rotation(pool));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Interleave GPU encryptions and decryptions, and their CPU counterparts, with
|
||||
// key rotations.
|
||||
static NV_STATUS test_channel_key_rotation_interleave(uvm_gpu_t *gpu)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
void *final_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
final_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (final_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(gpu_to_cpu_pool), out);
|
||||
TEST_NV_CHECK_GOTO(force_key_rotation(cpu_to_gpu_pool), out);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_gpu_to_cpu(gpu,
|
||||
final_plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
NULL,
|
||||
"GPU > CPU"),
|
||||
out);
|
||||
|
||||
TEST_CHECK_GOTO(!memcmp(initial_plain_cpu, final_plain_cpu, size), out);
|
||||
|
||||
memset(final_plain_cpu, 0, size);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(final_plain_cpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS memset_vidmem(uvm_mem_t *mem, NvU8 val)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_address_t gpu_address;
|
||||
uvm_gpu_t *gpu = mem->backing_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_vidmem(mem));
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, &push, "zero vidmem"));
|
||||
|
||||
gpu_address = uvm_mem_gpu_address_virtual_kernel(mem, gpu);
|
||||
gpu->parent->ce_hal->memset_1(&push, gpu_address, val, mem->size);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Custom version of uvm_conf_computing_util_memcopy_gpu_to_cpu that allows
|
||||
// testing to insert key rotations in between the push end, and the CPU
|
||||
// decryption
|
||||
static NV_STATUS encrypted_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
uvm_channel_t *channel;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "Small GPU > CPU encryption");
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
channel = push.channel;
|
||||
uvm_conf_computing_log_gpu_encryption(channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
TEST_NV_CHECK_GOTO(force_key_rotations(channel->pool, num_rotations_to_insert), out);
|
||||
|
||||
// If num_rotations_to_insert is not zero, the current encryption key will
|
||||
// be different from the one used during CE encryption.
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation_cpu_decryption(uvm_gpu_t *gpu,
|
||||
unsigned num_repetitions,
|
||||
unsigned num_rotations_to_insert)
|
||||
{
|
||||
unsigned i;
|
||||
uvm_channel_pool_t *gpu_to_cpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
NvU8 *plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_to_cpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_CPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(gpu_to_cpu_pool));
|
||||
|
||||
plain_cpu = (NvU8 *) uvm_kvmalloc_zero(size);
|
||||
if (plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
TEST_NV_CHECK_GOTO(memset_vidmem(plain_gpu, 1), out);
|
||||
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
for (i = 0; i < num_repetitions; i++) {
|
||||
unsigned j;
|
||||
|
||||
TEST_NV_CHECK_GOTO(encrypted_memcopy_gpu_to_cpu(gpu,
|
||||
plain_cpu,
|
||||
plain_gpu_address,
|
||||
size,
|
||||
num_rotations_to_insert),
|
||||
out);
|
||||
|
||||
for (j = 0; j < size; j++)
|
||||
TEST_CHECK_GOTO(plain_cpu[j] == 1, out);
|
||||
|
||||
memset(plain_cpu, 0, size);
|
||||
|
||||
}
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Test that CPU decryptions can use old keys i.e. previous versions of the keys
|
||||
// that are no longer the current key, due to key rotation. Given that SEC2
|
||||
// does not expose encryption capabilities, the "decrypt-after-rotation" problem
|
||||
// is exclusive of CE encryptions.
|
||||
static NV_STATUS test_channel_key_rotation_decrypt_after_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
// Instruct encrypted_memcopy_gpu_to_cpu to insert several key rotations
|
||||
// between the GPU encryption, and the associated CPU decryption.
|
||||
unsigned num_rotations_to_insert = 8;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_cpu_decryption(gpu, 1, num_rotations_to_insert));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_channel_key_rotation(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
break;
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_basic(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_interleave(gpu));
|
||||
|
||||
TEST_NV_CHECK_RET(test_channel_key_rotation_decrypt_after_key_rotation(gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@ -1094,7 +1419,7 @@ static NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
|
||||
|
||||
// Release the semaphore.
|
||||
UVM_WRITE_ONCE(*cpu_ptr, 1);
|
||||
WRITE_ONCE(*cpu_ptr, 1);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
|
||||
|
||||
@ -1203,6 +1528,10 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
status = test_channel_key_rotation(va_space);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// The following tests have side effects, they reset the GPU's
|
||||
// channel_manager.
|
||||
status = test_channel_pushbuffer_extension_base(va_space);
|
||||
@ -1338,6 +1667,126 @@ done:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_encryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
int i;
|
||||
uvm_channel_pool_t *cpu_to_gpu_pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t size = UVM_CONF_COMPUTING_DMA_BUFFER_SIZE;
|
||||
void *initial_plain_cpu = NULL;
|
||||
uvm_mem_t *plain_gpu = NULL;
|
||||
uvm_gpu_address_t plain_gpu_address;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU);
|
||||
|
||||
cpu_to_gpu_pool = gpu->channel_manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_CPU_TO_GPU];
|
||||
TEST_CHECK_RET(uvm_conf_computing_is_key_rotation_enabled_in_pool(cpu_to_gpu_pool));
|
||||
|
||||
initial_plain_cpu = uvm_kvmalloc_zero(size);
|
||||
if (initial_plain_cpu == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_alloc_vidmem(size, gpu, &plain_gpu), out);
|
||||
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(plain_gpu, gpu), out);
|
||||
plain_gpu_address = uvm_mem_gpu_address_virtual_kernel(plain_gpu, gpu);
|
||||
|
||||
memset(initial_plain_cpu, 1, size);
|
||||
|
||||
for (i = 0; i < params->iterations; i++) {
|
||||
TEST_NV_CHECK_GOTO(uvm_conf_computing_util_memcopy_cpu_to_gpu(gpu,
|
||||
plain_gpu_address,
|
||||
initial_plain_cpu,
|
||||
size,
|
||||
NULL,
|
||||
"CPU > GPU"),
|
||||
out);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_mem_free(plain_gpu);
|
||||
uvm_kvfree(initial_plain_cpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_cpu_decryption(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
unsigned num_rotations_to_insert = 0;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU);
|
||||
|
||||
return test_channel_key_rotation_cpu_decryption(gpu, params->iterations, num_rotations_to_insert);
|
||||
}
|
||||
|
||||
static NV_STATUS channel_stress_key_rotation_rotate(uvm_gpu_t *gpu, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE);
|
||||
|
||||
for (i = 0; i < params->iterations; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_channel_type_t type;
|
||||
|
||||
if ((i % 3) == 0)
|
||||
type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
|
||||
else if ((i % 3) == 1)
|
||||
type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
|
||||
else
|
||||
type = UVM_CHANNEL_TYPE_WLC;
|
||||
|
||||
pool = gpu->channel_manager->pool_to_use.default_for_type[type];
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
status = force_key_rotation(pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// The objective of this test is documented in the user-level function
|
||||
static NV_STATUS uvm_test_channel_stress_key_rotation(uvm_va_space_t *va_space, UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
uvm_test_rng_t rng;
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
uvm_test_rng_init(&rng, params->seed);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Key rotation should be enabled, or disabled, in all GPUs. Pick a random
|
||||
// one.
|
||||
gpu = random_va_space_gpu(&rng, va_space);
|
||||
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(gpu))
|
||||
goto out;
|
||||
|
||||
if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_CPU_TO_GPU)
|
||||
status = channel_stress_key_rotation_cpu_encryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_GPU_TO_CPU)
|
||||
status = channel_stress_key_rotation_cpu_decryption(gpu, params);
|
||||
else if (params->key_rotation_operation == UVM_TEST_CHANNEL_STRESS_KEY_ROTATION_OPERATION_ROTATE)
|
||||
status = channel_stress_key_rotation_rotate(gpu, params);
|
||||
else
|
||||
status = NV_ERR_INVALID_PARAMETER;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
@ -1349,6 +1798,8 @@ NV_STATUS uvm_test_channel_stress(UVM_TEST_CHANNEL_STRESS_PARAMS *params, struct
|
||||
return uvm_test_channel_stress_update_channels(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_NOOP_PUSH:
|
||||
return uvm_test_channel_noop_push(va_space, params);
|
||||
case UVM_TEST_CHANNEL_STRESS_MODE_KEY_ROTATION:
|
||||
return uvm_test_channel_stress_key_rotation(va_space, params);
|
||||
default:
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
}
|
||||
|
@ -281,29 +281,6 @@ NV_STATUS uvm_spin_loop(uvm_spin_loop_t *spin)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// This formats a GPU UUID, in a UVM-friendly way. That is, nearly the same as
|
||||
// what nvidia-smi reports. It will always prefix the UUID with UVM-GPU so
|
||||
// that we know that we have a real, binary formatted UUID that will work in
|
||||
// the UVM APIs.
|
||||
//
|
||||
// It comes out like this:
|
||||
//
|
||||
// UVM-GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
|
||||
//
|
||||
// This routine will always null-terminate the string for you. This is true
|
||||
// even if the buffer was too small!
|
||||
//
|
||||
// Return value is the number of non-null characters written.
|
||||
//
|
||||
// Note that if you were to let the NV2080_CTRL_CMD_GPU_GET_GID_INFO command
|
||||
// return it's default format, which is ascii, not binary, then you would get
|
||||
// this back:
|
||||
//
|
||||
// GPU-d802726c-df8d-a3c3-ec53-48bdec201c27
|
||||
//
|
||||
// ...which is actually a character string, and won't work for UVM API calls.
|
||||
// So it's very important to be able to see the difference.
|
||||
//
|
||||
static char uvm_digit_to_hex(unsigned value)
|
||||
{
|
||||
if (value >= 10)
|
||||
@ -312,27 +289,19 @@ static char uvm_digit_to_hex(unsigned value)
|
||||
return value + '0';
|
||||
}
|
||||
|
||||
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pUuidStruct)
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *pUuidStruct)
|
||||
{
|
||||
char *str = buffer+8;
|
||||
char *str = buffer;
|
||||
unsigned i;
|
||||
unsigned dashMask = 1 << 4 | 1 << 6 | 1 << 8 | 1 << 10;
|
||||
|
||||
if (bufferLength < (8 /*prefix*/+ 16 * 2 /*digits*/ + 4 * 1 /*dashes*/ + 1 /*null*/))
|
||||
return *buffer = 0;
|
||||
|
||||
memcpy(buffer, "UVM-GPU-", 8);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] >> 4);
|
||||
*str++ = uvm_digit_to_hex(pUuidStruct->uuid[i] & 0xF);
|
||||
|
||||
if (dashMask & (1 << (i+1)))
|
||||
if (dashMask & (1 << (i + 1)))
|
||||
*str++ = '-';
|
||||
}
|
||||
|
||||
*str = 0;
|
||||
|
||||
return (int)(str-buffer);
|
||||
}
|
||||
|
||||
|
@ -50,9 +50,12 @@ enum {
|
||||
NVIDIA_UVM_NUM_MINOR_DEVICES
|
||||
};
|
||||
|
||||
#define UVM_GPU_UUID_TEXT_BUFFER_LENGTH (8+16*2+4+1)
|
||||
// UUID has the format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
|
||||
#define UVM_UUID_STRING_LENGTH ((8 + 1) + 3 * (4 + 1) + 12 + 1)
|
||||
|
||||
int format_uuid_to_buffer(char *buffer, unsigned bufferLength, const NvProcessorUuid *pGpuUuid);
|
||||
// Writes UVM_UUID_STRING_LENGTH characters into buffer, including a terminating
|
||||
// NULL.
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
|
||||
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix "%s:%u %s[pid:%d]" fmt, \
|
||||
@ -98,27 +101,9 @@ bool uvm_debug_prints_enabled(void);
|
||||
#define UVM_INFO_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
//
|
||||
// Please see the documentation of format_uuid_to_buffer, for details on what
|
||||
// this routine prints for you.
|
||||
//
|
||||
#define UVM_DBG_PRINT_UUID(msg, uuidPtr) \
|
||||
do { \
|
||||
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
|
||||
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
|
||||
UVM_DBG_PRINT("%s: %s\n", msg, uuidBuffer); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT_UUID(msg, uuidPtr, ...) \
|
||||
do { \
|
||||
char uuidBuffer[UVM_GPU_UUID_TEXT_BUFFER_LENGTH]; \
|
||||
format_uuid_to_buffer(uuidBuffer, sizeof(uuidBuffer), uuidPtr); \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", uuidBuffer, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
|
||||
#define UVM_PANIC_MSG(fmt, ...) UVM_PRINT_FUNC(panic, ": " fmt, ##__VA_ARGS__)
|
||||
|
||||
@ -395,7 +380,7 @@ static inline void uvm_touch_page(struct page *page)
|
||||
UVM_ASSERT(page);
|
||||
|
||||
mapping = (char *) kmap(page);
|
||||
(void)UVM_READ_ONCE(*mapping);
|
||||
(void)READ_ONCE(*mapping);
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,15 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_va_block.h"
|
||||
|
||||
// Amount of encrypted data on a given engine that triggers key rotation. This
|
||||
// is a UVM internal threshold, different from that of RM, and used only during
|
||||
// testing.
|
||||
//
|
||||
// Key rotation is triggered when the total encryption size, or the total
|
||||
// decryption size (whatever comes first) reaches this lower threshold on the
|
||||
// engine.
|
||||
#define UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD (UVM_SIZE_1MB * 8)
|
||||
|
||||
// The maximum number of secure operations per push is:
|
||||
// UVM_MAX_PUSH_SIZE / min(CE encryption size, CE decryption size)
|
||||
// + 1 (tracking semaphore) = 128 * 1024 / 56 + 1 = 2342
|
||||
@ -352,6 +361,19 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
// The production key rotation defaults are such that key rotations rarely
|
||||
// happen. During UVM testing more frequent rotations are triggering by relying
|
||||
// on internal encryption usage accounting. When key rotations are triggered by
|
||||
// UVM, the driver does not rely on channel key rotation notifiers.
|
||||
//
|
||||
// TODO: Bug 4612912: UVM should be able to programmatically set the rotation
|
||||
// lower threshold. This function, and all the metadata associated with it
|
||||
// (per-pool encryption accounting, for example) can be removed at that point.
|
||||
static bool key_rotation_is_notifier_driven(void)
|
||||
{
|
||||
return !uvm_enable_builtin_tests;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -394,17 +416,35 @@ void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu)
|
||||
conf_computing_dma_buffer_pool_deinit(&gpu->conf_computing.dma_buffer_pool);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.encrypted);
|
||||
}
|
||||
|
||||
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv)
|
||||
@ -428,27 +468,46 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
void *auth_tag_buffer)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_channel_pool_t *pool;
|
||||
|
||||
UVM_ASSERT(size);
|
||||
|
||||
if (uvm_channel_is_lcic(channel))
|
||||
pool = uvm_channel_lcic_get_paired_wlc(channel)->pool;
|
||||
else
|
||||
pool = channel->pool;
|
||||
|
||||
uvm_mutex_lock(&channel->csl.ctx_lock);
|
||||
|
||||
status = nvUvmInterfaceCslEncrypt(&channel->csl.ctx,
|
||||
size,
|
||||
(NvU8 const *) src_plain,
|
||||
encrypt_iv,
|
||||
(NvU8 *) dst_cipher,
|
||||
(NvU8 *) auth_tag_buffer);
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
// IV rotation is done preemptively as needed, so the above
|
||||
// call cannot return failure.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (uvm_conf_computing_is_key_rotation_enabled_in_pool(pool)) {
|
||||
status = nvUvmInterfaceCslLogEncryption(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, size);
|
||||
|
||||
// Informing RM of an encryption/decryption should not fail
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (!key_rotation_is_notifier_driven())
|
||||
atomic64_add(size, &pool->conf_computing.key_rotation.decrypted);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer)
|
||||
{
|
||||
@ -469,11 +528,19 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
size,
|
||||
(const NvU8 *) src_cipher,
|
||||
src_iv,
|
||||
NV_U32_MAX,
|
||||
key_version,
|
||||
(NvU8 *) dst_plain,
|
||||
NULL,
|
||||
0,
|
||||
(const NvU8 *) auth_tag_buffer);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("nvUvmInterfaceCslDecrypt() failed: %s, channel %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
channel->name,
|
||||
uvm_gpu_name(uvm_channel_get_gpu(channel)));
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&channel->csl.ctx_lock);
|
||||
|
||||
return status;
|
||||
@ -640,3 +707,231 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
{
|
||||
return uvm_conf_computing_rotate_channel_ivs_below_limit(channel, uvm_conf_computing_channel_iv_rotation_limit, true);
|
||||
}
|
||||
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
// Key rotation cannot be enabled on UVM if it is disabled on RM
|
||||
if (!gpu->parent->rm_info.gpuConfComputeCaps.bKeyRotationEnabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = true;
|
||||
}
|
||||
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (!g_uvm_global.conf_computing_enabled)
|
||||
return;
|
||||
|
||||
gpu->channel_manager->conf_computing.key_rotation_enabled = false;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu)
|
||||
{
|
||||
return gpu->channel_manager->conf_computing.key_rotation_enabled;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled(pool->manager->gpu))
|
||||
return false;
|
||||
|
||||
// TODO: Bug 4586447: key rotation must be disabled in the SEC2 engine,
|
||||
// because currently the encryption key is shared between UVM and RM, but
|
||||
// UVM is not able to idle SEC2 channels owned by RM.
|
||||
if (uvm_channel_pool_is_sec2(pool))
|
||||
return false;
|
||||
|
||||
// Key rotation happens as part of channel reservation, and LCIC channels
|
||||
// are never reserved directly. Rotation of keys in LCIC channels happens
|
||||
// as the result of key rotation in WLC channels.
|
||||
//
|
||||
// Return false even if there is nothing fundamental prohibiting direct key
|
||||
// rotation on LCIC pools
|
||||
if (uvm_channel_pool_is_lcic(pool))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_stats(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NvU64 decrypted, encrypted;
|
||||
|
||||
UVM_ASSERT(!key_rotation_is_notifier_driven());
|
||||
|
||||
decrypted = atomic64_read(&pool->conf_computing.key_rotation.decrypted);
|
||||
|
||||
if (decrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
encrypted = atomic64_read(&pool->conf_computing.key_rotation.encrypted);
|
||||
|
||||
if (encrypted > UVM_CONF_COMPUTING_KEY_ROTATION_LOWER_THRESHOLD)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool conf_computing_is_key_rotation_pending_use_notifier(uvm_channel_pool_t *pool)
|
||||
{
|
||||
// If key rotation is pending for the pool's engine, then the key rotation
|
||||
// notifier in any of the engine channels can be used by UVM to detect the
|
||||
// situation. Note that RM doesn't update all the notifiers in a single
|
||||
// atomic operation, so it is possible that the channel read by UVM (the
|
||||
// first one in the pool) indicates that a key rotation is pending, but
|
||||
// another channel in the pool (temporarily) indicates the opposite, or vice
|
||||
// versa.
|
||||
uvm_channel_t *first_channel = pool->channels;
|
||||
|
||||
UVM_ASSERT(key_rotation_is_notifier_driven());
|
||||
UVM_ASSERT(first_channel != NULL);
|
||||
|
||||
return first_channel->channel_info.keyRotationNotifier->status == UVM_KEY_ROTATION_STATUS_PENDING;
|
||||
}
|
||||
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (!uvm_conf_computing_is_key_rotation_enabled_in_pool(pool))
|
||||
return false;
|
||||
|
||||
if (key_rotation_is_notifier_driven())
|
||||
return conf_computing_is_key_rotation_pending_use_notifier(pool);
|
||||
else
|
||||
return conf_computing_is_key_rotation_pending_use_stats(pool);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_conf_computing_is_key_rotation_enabled_in_pool(pool));
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.csl_contexts != NULL);
|
||||
UVM_ASSERT(pool->conf_computing.key_rotation.num_csl_contexts > 0);
|
||||
|
||||
// NV_ERR_STATE_IN_USE indicates that RM was not able to acquire the
|
||||
// required locks at this time. This status is not interpreted as an error,
|
||||
// but as a sign for UVM to try again later. This is the same "protocol"
|
||||
// used in IV rotation.
|
||||
status = nvUvmInterfaceCslRotateKey(pool->conf_computing.key_rotation.csl_contexts,
|
||||
pool->conf_computing.key_rotation.num_csl_contexts);
|
||||
|
||||
if (status == NV_OK) {
|
||||
pool->conf_computing.key_rotation.version++;
|
||||
|
||||
if (!key_rotation_is_notifier_driven()) {
|
||||
atomic64_set(&pool->conf_computing.key_rotation.decrypted, 0);
|
||||
atomic64_set(&pool->conf_computing.key_rotation.encrypted, 0);
|
||||
}
|
||||
}
|
||||
else if (status != NV_ERR_STATE_IN_USE) {
|
||||
UVM_DBG_PRINT("nvUvmInterfaceCslRotateKey() failed in engine %u: %s\n",
|
||||
pool->engine_index,
|
||||
nvstatusToString(status));
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t src_gpu_address, auth_tag_gpu_address;
|
||||
void *dst_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
dst_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
uvm_conf_computing_cpu_encrypt(push.channel, dst_cipher, src_plain, NULL, size, auth_tag);
|
||||
|
||||
src_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->decrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_conf_computing_dma_buffer_t *dma_buffer;
|
||||
uvm_gpu_address_t dst_gpu_address, auth_tag_gpu_address;
|
||||
void *src_cipher, *auth_tag;
|
||||
va_list args;
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(size <= UVM_CONF_COMPUTING_DMA_BUFFER_SIZE);
|
||||
|
||||
status = uvm_conf_computing_dma_buffer_alloc(&gpu->conf_computing.dma_buffer_pool, &dma_buffer, NULL);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_start(args, format);
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, tracker, &push, format, args);
|
||||
va_end(args);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_conf_computing_log_gpu_encryption(push.channel, size, dma_buffer->decrypt_iv);
|
||||
dma_buffer->key_version[0] = uvm_channel_pool_key_version(push.channel->pool);
|
||||
|
||||
dst_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->alloc, gpu);
|
||||
auth_tag_gpu_address = uvm_mem_gpu_address_virtual_kernel(dma_buffer->auth_tag, gpu);
|
||||
gpu->parent->ce_hal->encrypt(&push, dst_gpu_address, src_gpu_address, size, auth_tag_gpu_address);
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
src_cipher = uvm_mem_get_cpu_addr_kernel(dma_buffer->alloc);
|
||||
auth_tag = uvm_mem_get_cpu_addr_kernel(dma_buffer->auth_tag);
|
||||
status = uvm_conf_computing_cpu_decrypt(push.channel,
|
||||
dst_plain,
|
||||
src_cipher,
|
||||
dma_buffer->decrypt_iv,
|
||||
dma_buffer->key_version[0],
|
||||
size,
|
||||
auth_tag);
|
||||
|
||||
out:
|
||||
uvm_conf_computing_dma_buffer_free(&gpu->conf_computing.dma_buffer_pool, dma_buffer, NULL);
|
||||
return status;
|
||||
}
|
||||
|
@ -87,9 +87,9 @@ typedef struct
|
||||
// a free buffer.
|
||||
uvm_tracker_t tracker;
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, SEC2
|
||||
// writes the authentication tag here. Later when the buffer is decrypted
|
||||
// on the CPU the authentication tag is used again (read) for CSL to verify
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// engine (CE or SEC2) writes the authentication tag here. When the buffer
|
||||
// is decrypted on the CPU the authentication tag is used by CSL to verify
|
||||
// the authenticity. The allocation is big enough for one authentication
|
||||
// tag per PAGE_SIZE page in the alloc buffer.
|
||||
uvm_mem_t *auth_tag;
|
||||
@ -98,7 +98,12 @@ typedef struct
|
||||
// to the authentication tag. The allocation is big enough for one IV per
|
||||
// PAGE_SIZE page in the alloc buffer. The granularity between the decrypt
|
||||
// IV and authentication tag must match.
|
||||
UvmCslIv decrypt_iv[(UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE)];
|
||||
UvmCslIv decrypt_iv[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// When the DMA buffer is used as the destination of a GPU encryption, the
|
||||
// key version used during GPU encryption of each PAGE_SIZE page can be
|
||||
// saved here, so CPU decryption uses the correct decryption key.
|
||||
NvU32 key_version[UVM_CONF_COMPUTING_DMA_BUFFER_SIZE / PAGE_SIZE];
|
||||
|
||||
// Bitmap of the encrypted pages in the backing allocation
|
||||
uvm_page_mask_t encrypted_page_mask;
|
||||
@ -147,7 +152,7 @@ NV_STATUS uvm_conf_computing_gpu_init(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_gpu_deinit(uvm_gpu_t *gpu);
|
||||
|
||||
// Logs encryption information from the GPU and returns the IV.
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, size_t size, UvmCslIv *iv);
|
||||
|
||||
// Acquires next CPU encryption IV and returns it.
|
||||
void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *iv);
|
||||
@ -167,10 +172,14 @@ void uvm_conf_computing_cpu_encrypt(uvm_channel_t *channel,
|
||||
// CPU side decryption helper. Decrypts data from src_cipher and writes the
|
||||
// plain text in dst_plain. src_cipher and dst_plain can't overlap. IV obtained
|
||||
// from uvm_conf_computing_log_gpu_encryption() needs to be be passed to src_iv.
|
||||
//
|
||||
// The caller must indicate which key to use for decryption by passing the
|
||||
// appropiate key version number.
|
||||
NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
|
||||
void *dst_plain,
|
||||
const void *src_cipher,
|
||||
const UvmCslIv *src_iv,
|
||||
NvU32 key_version,
|
||||
size_t size,
|
||||
const void *auth_tag_buffer);
|
||||
|
||||
@ -214,4 +223,71 @@ NV_STATUS uvm_conf_computing_maybe_rotate_channel_ivs_retry_busy(uvm_channel_t *
|
||||
// Check if there are fewer than 'limit' messages available in either direction
|
||||
// and rotate if not.
|
||||
NV_STATUS uvm_conf_computing_rotate_channel_ivs_below_limit(uvm_channel_t *channel, NvU64 limit, bool retry_if_busy);
|
||||
|
||||
// Rotate the engine key associated with the given channel pool.
|
||||
NV_STATUS uvm_conf_computing_rotate_pool_key(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is allowed in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Returns true if key rotation is pending in the channel pool.
|
||||
bool uvm_conf_computing_is_key_rotation_pending_in_pool(uvm_channel_pool_t *pool);
|
||||
|
||||
// Enable/disable key rotation in the passed GPU. Note that UVM enablement is
|
||||
// dependent on RM enablement: key rotation may still be disabled upon calling
|
||||
// this function, if it is disabled in RM. On the other hand, key rotation can
|
||||
// be disabled in UVM, even if it is enabled in RM.
|
||||
//
|
||||
// Enablement/Disablement affects only kernel key rotation in keys owned by UVM.
|
||||
// It doesn't affect user key rotation (CUDA, Video...), nor it affects RM
|
||||
// kernel key rotation.
|
||||
void uvm_conf_computing_enable_key_rotation(uvm_gpu_t *gpu);
|
||||
void uvm_conf_computing_disable_key_rotation(uvm_gpu_t *gpu);
|
||||
|
||||
// Returns true if key rotation is enabled on UVM in the given GPU. Key rotation
|
||||
// can be enabled on the GPU but disabled on some of GPU engines (LCEs or SEC2),
|
||||
// see uvm_conf_computing_is_key_rotation_enabled_in_pool.
|
||||
bool uvm_conf_computing_is_key_rotation_enabled(uvm_gpu_t *gpu);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_cpu_to_gpu(uvm_gpu_t *gpu,
|
||||
uvm_gpu_address_t dst_gpu_address,
|
||||
void *src_plain,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
|
||||
// Launch a synchronous, encrypted copy between CPU and GPU.
|
||||
//
|
||||
// The maximum copy size allowed is UVM_CONF_COMPUTING_DMA_BUFFER_SIZE.
|
||||
//
|
||||
// The source CPU buffer pointed by src_plain contains the unencrypted (plain
|
||||
// text) contents; the function internally performs a CPU-side encryption step
|
||||
// before launching the GPU-side CE decryption. The source buffer can be in
|
||||
// protected or unprotected sysmem, while the destination buffer must be in
|
||||
// protected vidmem.
|
||||
//
|
||||
// The input tracker, if not NULL, is internally acquired by the push
|
||||
// responsible for the encrypted copy.
|
||||
__attribute__ ((format(printf, 6, 7)))
|
||||
NV_STATUS uvm_conf_computing_util_memcopy_gpu_to_cpu(uvm_gpu_t *gpu,
|
||||
void *dst_plain,
|
||||
uvm_gpu_address_t src_gpu_address,
|
||||
size_t size,
|
||||
uvm_tracker_t *tracker,
|
||||
const char *format,
|
||||
...);
|
||||
#endif // __UVM_CONF_COMPUTING_H__
|
||||
|
@ -1,53 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
// This file provides simple wrappers that are always built with optimizations
|
||||
// turned on to WAR issues with functions that don't build correctly otherwise.
|
||||
|
||||
#include "uvm_linux.h"
|
||||
|
||||
int nv_atomic_xchg(atomic_t *val, int new)
|
||||
{
|
||||
return atomic_xchg(val, new);
|
||||
}
|
||||
|
||||
int nv_atomic_cmpxchg(atomic_t *val, int old, int new)
|
||||
{
|
||||
return atomic_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new)
|
||||
{
|
||||
return atomic_long_cmpxchg(val, old, new);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
return copy_from_user(to, from, n);
|
||||
}
|
||||
|
||||
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
return copy_to_user(to, from, n);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -42,7 +42,6 @@ typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
|
||||
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;
|
||||
typedef struct uvm_gpu_semaphore_pool_page_struct uvm_gpu_semaphore_pool_page_t;
|
||||
typedef struct uvm_gpu_peer_struct uvm_gpu_peer_t;
|
||||
typedef struct uvm_mmu_mode_hal_struct uvm_mmu_mode_hal_t;
|
||||
|
||||
typedef struct uvm_channel_manager_struct uvm_channel_manager_t;
|
||||
@ -57,6 +56,12 @@ typedef struct uvm_gpfifo_entry_struct uvm_gpfifo_entry_t;
|
||||
|
||||
typedef struct uvm_va_policy_struct uvm_va_policy_t;
|
||||
typedef struct uvm_va_range_struct uvm_va_range_t;
|
||||
typedef struct uvm_va_range_managed_struct uvm_va_range_managed_t;
|
||||
typedef struct uvm_va_range_external_struct uvm_va_range_external_t;
|
||||
typedef struct uvm_va_range_channel_struct uvm_va_range_channel_t;
|
||||
typedef struct uvm_va_range_sked_reflected_struct uvm_va_range_sked_reflected_t;
|
||||
typedef struct uvm_va_range_semaphore_pool_struct uvm_va_range_semaphore_pool_t;
|
||||
typedef struct uvm_va_range_device_p2p_struct uvm_va_range_device_p2p_t;
|
||||
typedef struct uvm_va_block_struct uvm_va_block_t;
|
||||
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
|
||||
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
|
||||
|
@ -115,8 +115,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
|
||||
TEST_CHECK_RET(skip);
|
||||
|
||||
memory_owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &memory_info->uuid);
|
||||
if (memory_owning_gpu == NULL)
|
||||
memory_owning_gpu = uvm_va_space_get_gpu_by_mem_info(va_space, memory_info);
|
||||
if (!memory_owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
aperture = get_aperture(va_space, memory_owning_gpu, memory_mapping_gpu, memory_info, sli_supported);
|
||||
@ -129,7 +129,8 @@ static NV_STATUS verify_mapping_info(uvm_va_space_t *va_space,
|
||||
phys_offset = mapping_offset;
|
||||
|
||||
// Add the physical offset for nvswitch connected peer mappings
|
||||
if (uvm_aperture_is_peer(aperture) && uvm_gpus_are_nvswitch_connected(memory_mapping_gpu, memory_owning_gpu))
|
||||
if (uvm_aperture_is_peer(aperture) &&
|
||||
uvm_parent_gpus_are_nvswitch_connected(memory_mapping_gpu->parent, memory_owning_gpu->parent))
|
||||
phys_offset += memory_owning_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
|
||||
for (index = 0; index < ext_mapping_info->numWrittenPtes; index++) {
|
||||
|
@ -412,7 +412,7 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
|
||||
UVM_ASSERT(error != NV_OK);
|
||||
|
||||
previous_error = nv_atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
previous_error = atomic_cmpxchg(&g_uvm_global.fatal_error, NV_OK, error);
|
||||
|
||||
if (previous_error == NV_OK) {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s\n", nvstatusToString(error));
|
||||
@ -421,6 +421,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
|
||||
nvstatusToString(error), nvstatusToString(previous_error));
|
||||
}
|
||||
|
||||
nvUvmInterfaceReportFatalError(error);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
@ -430,7 +432,7 @@ NV_STATUS uvm_global_reset_fatal_error(void)
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
return nv_atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
return atomic_xchg(&g_uvm_global.fatal_error, NV_OK);
|
||||
}
|
||||
|
||||
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -52,19 +52,23 @@ struct uvm_global_struct
|
||||
// Created on module load and destroyed on module unload
|
||||
uvmGpuSessionHandle rm_session_handle;
|
||||
|
||||
// peer-to-peer table
|
||||
// peer info is added and removed from this table when usermode
|
||||
// driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
|
||||
// respectively.
|
||||
uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
|
||||
// Peer-to-peer table for storing parent GPU and MIG instance peer info.
|
||||
// Note that MIG instances can be peers within a single parent GPU or
|
||||
// be peers in different parent GPUs if NVLINK or PCIe peers is enabled.
|
||||
// PCIe and MIG peer info is added and removed from this table when
|
||||
// usermode driver calls UvmEnablePeerAccess() and UvmDisablePeerAccess()
|
||||
// respectively. NvLink and MIG peers are updated when UvmRegisterGpu() and
|
||||
// UvmUnregisterGpu() are called. Peer to peer state for MIG instances
|
||||
// within the same parent GPU are not stored here.
|
||||
uvm_parent_gpu_peer_t parent_gpu_peers[UVM_MAX_UNIQUE_PARENT_GPU_PAIRS];
|
||||
|
||||
// peer-to-peer copy mode
|
||||
// Pascal+ GPUs support virtual addresses in p2p copies.
|
||||
// Ampere+ GPUs add support for physical addresses in p2p copies.
|
||||
uvm_gpu_peer_copy_mode_t peer_copy_mode;
|
||||
|
||||
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
|
||||
// do most anything other than try and clean up as much as possible.
|
||||
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse
|
||||
// to do most anything other than try and clean up as much as possible.
|
||||
// An example of a fatal error is an unrecoverable ECC error on one of the
|
||||
// GPUs.
|
||||
atomic_t fatal_error;
|
||||
@ -232,12 +236,12 @@ static uvmGpuSessionHandle uvm_global_session_handle(void)
|
||||
// suspended.
|
||||
#define UVM_GPU_WRITE_ONCE(x, val) do { \
|
||||
UVM_ASSERT(!uvm_global_is_suspended()); \
|
||||
UVM_WRITE_ONCE(x, val); \
|
||||
WRITE_ONCE(x, val); \
|
||||
} while (0)
|
||||
|
||||
#define UVM_GPU_READ_ONCE(x) ({ \
|
||||
UVM_ASSERT(!uvm_global_is_suspended()); \
|
||||
UVM_READ_ONCE(x); \
|
||||
READ_ONCE(x); \
|
||||
})
|
||||
|
||||
static bool global_is_fatal_error_assert_disabled(void)
|
||||
@ -384,7 +388,7 @@ static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent
|
||||
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
|
||||
|
||||
// LOCKING: Must hold the global_lock
|
||||
#define for_each_gpu_in_parent(parent_gpu, gpu) \
|
||||
#define for_each_gpu_in_parent(gpu, parent_gpu) \
|
||||
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
|
||||
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
|
||||
(gpu) != NULL; \
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -49,9 +49,13 @@
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "uvm_conf_computing.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
|
||||
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
|
||||
#define UVM_PARENT_GPU_UUID_PREFIX "GPU-"
|
||||
#define UVM_GPU_UUID_PREFIX "GI-"
|
||||
|
||||
// UVM_UUID_STRING_LENGTH already includes NULL, don't double-count it with
|
||||
// sizeof()
|
||||
#define UVM_PARENT_GPU_UUID_STRING_LENGTH (sizeof(UVM_PARENT_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
|
||||
#define UVM_GPU_UUID_STRING_LENGTH (sizeof(UVM_GPU_UUID_PREFIX) - 1 + UVM_UUID_STRING_LENGTH)
|
||||
|
||||
#define UVM_GPU_MAGIC_VALUE 0xc001d00d12341993ULL
|
||||
|
||||
@ -184,29 +188,45 @@ struct uvm_service_block_context_struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. This is a logical or of read_fault_mask and
|
||||
// write_mask.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
// This is unused for access counter service requests.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE
|
||||
// aligned region of a SAM VMA. Used to return ATS fault status.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. This is used as input for access counter service requests and output
|
||||
// of fault service requests.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
// Mask of successfully serviced read faults on pages in
|
||||
// write_fault_mask.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
} faults;
|
||||
|
||||
struct
|
||||
{
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Mask of successfully migrated pages in a UVM_VA_BLOCK_SIZE
|
||||
// aligned region of a SAM VMA.
|
||||
uvm_page_mask_t migrated_mask;
|
||||
|
||||
} access_counters;
|
||||
};
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@ -633,9 +653,10 @@ struct uvm_gpu_struct
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name in the format:
|
||||
// ID: 999: GPU-<parent_uuid> UVM-GI-<gi_uuid>.
|
||||
// UVM_GPU_UUID_TEXT_BUFFER_LENGTH includes the null character.
|
||||
char name[9 + 2 * UVM_GPU_UUID_TEXT_BUFFER_LENGTH];
|
||||
// ID: 999: GPU-<parent_uuid> GI-<gi_uuid>
|
||||
// UVM_PARENT_GPU_UUID_STRING_LENGTH includes a NULL character but will be
|
||||
// used for a space instead.
|
||||
char name[sizeof("ID: 999: ") - 1 + UVM_PARENT_GPU_UUID_STRING_LENGTH - 1 + 1 + UVM_GPU_UUID_STRING_LENGTH];
|
||||
|
||||
// Refcount of the gpu, i.e. how many times it has been retained. This is
|
||||
// roughly a count of how many times it has been registered with a VA space,
|
||||
@ -682,6 +703,12 @@ struct uvm_gpu_struct
|
||||
bool enabled;
|
||||
unsigned int node_id;
|
||||
} numa;
|
||||
|
||||
// Physical address of the start of statically mapped fb memory in BAR1
|
||||
NvU64 static_bar1_start;
|
||||
|
||||
// Size of statically mapped fb memory in BAR1.
|
||||
NvU64 static_bar1_size;
|
||||
} mem_info;
|
||||
|
||||
struct
|
||||
@ -706,9 +733,6 @@ struct uvm_gpu_struct
|
||||
struct
|
||||
{
|
||||
// Mask of peer_gpus set
|
||||
//
|
||||
// We can use a regular processor id because P2P is not allowed between
|
||||
// partitioned GPUs when SMC is enabled
|
||||
uvm_processor_mask_t peer_gpu_mask;
|
||||
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
@ -859,16 +883,19 @@ struct uvm_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/"
|
||||
struct proc_dir_entry *dir;
|
||||
|
||||
// "gpus/${gpu_id}" -> "UVM-GPU-${physical-UUID}/${sub_processor_index}"
|
||||
struct proc_dir_entry *dir_symlink;
|
||||
|
||||
// The GPU instance UUID symlink if SMC is enabled.
|
||||
// The GPU instance UUID symlink.
|
||||
// "gpus/UVM-GI-${GI-UUID}" ->
|
||||
// "UVM-GPU-${physical-UUID}/${sub_processor_index}"
|
||||
struct proc_dir_entry *gpu_instance_uuid_symlink;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/${sub_processor_index}/info"
|
||||
struct proc_dir_entry *info_file;
|
||||
|
||||
struct proc_dir_entry *dir_peers;
|
||||
} procfs;
|
||||
|
||||
// Placeholder for per-GPU performance heuristics information
|
||||
@ -876,6 +903,13 @@ struct uvm_gpu_struct
|
||||
|
||||
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
|
||||
bool uvm_test_force_upper_pushbuffer_segment;
|
||||
|
||||
// Have we initialised device p2p pages.
|
||||
bool device_p2p_initialised;
|
||||
|
||||
// Used to protect allocation of p2p_mem and assignment of the page
|
||||
// zone_device_data fields.
|
||||
uvm_mutex_t device_p2p_lock;
|
||||
};
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
@ -905,7 +939,7 @@ struct uvm_parent_gpu_struct
|
||||
NvProcessorUuid uuid;
|
||||
|
||||
// Nice printable name including the uvm gpu id, ascii name from RM and uuid
|
||||
char name[UVM_GPU_NICE_NAME_BUFFER_LENGTH];
|
||||
char name[sizeof("ID 999: : ") - 1 + UVM_GPU_NAME_LENGTH + UVM_PARENT_GPU_UUID_STRING_LENGTH];
|
||||
|
||||
// GPU information and provided by RM (architecture, implementation,
|
||||
// hardware classes, etc.).
|
||||
@ -1087,11 +1121,17 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// "gpus/UVM-GPU-${physical-UUID}/"
|
||||
struct proc_dir_entry *dir;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/fault_stats"
|
||||
struct proc_dir_entry *fault_stats_file;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/access_counters"
|
||||
struct proc_dir_entry *access_counters_file;
|
||||
|
||||
// "gpus/UVM-GPU-${physical-UUID}/peers/"
|
||||
struct proc_dir_entry *dir_peers;
|
||||
} procfs;
|
||||
|
||||
// Interrupt handling state and locks
|
||||
@ -1239,42 +1279,59 @@ static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
|
||||
return gpu->parent->rm_device;
|
||||
}
|
||||
|
||||
struct uvm_gpu_peer_struct
|
||||
typedef struct
|
||||
{
|
||||
// ref_count also controls state maintained in each GPU instance
|
||||
// (uvm_gpu_t). See init_peer_access().
|
||||
NvU64 ref_count;
|
||||
} uvm_gpu_peer_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// The fields in this global structure can only be inspected under one of
|
||||
// the following conditions:
|
||||
//
|
||||
// - The VA space lock is held for either read or write, both GPUs are
|
||||
// registered in the VA space, and the corresponding bit in the
|
||||
// - The VA space lock is held for either read or write, both parent GPUs
|
||||
// are registered in the VA space, and the corresponding bit in the
|
||||
// va_space.enabled_peers bitmap is set.
|
||||
//
|
||||
// - The global lock is held.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be SMC peers and were both retained.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be NVLINK peers and were both retained.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// detected to be NVLINK peers and were both retained.
|
||||
//
|
||||
// - While the global lock was held in the past, the two GPUs were detected
|
||||
// to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was called.
|
||||
// - While the global lock was held in the past, the two parent GPUs were
|
||||
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
|
||||
// called.
|
||||
//
|
||||
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be read from the original GPU's peer_gpus table. The fields
|
||||
// will not change while the lock is held, but they may no longer be valid
|
||||
// because the other GPU might be in teardown.
|
||||
|
||||
// Peer Id associated with this device w.r.t. to a peer GPU.
|
||||
// This field is used to determine when this struct has been initialized
|
||||
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
|
||||
// PCIe peers are initialized when retain_pcie_peers_from_uuids() is called.
|
||||
NvU64 ref_count;
|
||||
|
||||
// Saved values from UvmGpuP2PCapsParams to be used after GPU instance
|
||||
// creation. This should be per GPU instance since LCEs are associated with
|
||||
// GPU instances, not parent GPUs, but for now MIG is not supported for
|
||||
// NVLINK peers so RM associates this state with the parent GPUs. This will
|
||||
// need to be revisited if that NVLINK MIG peer support is added.
|
||||
NvU8 optimalNvlinkWriteCEs[2];
|
||||
|
||||
// Peer Id associated with this device with respect to a peer parent GPU.
|
||||
// Note: peerId (A -> B) != peerId (B -> A)
|
||||
// peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
|
||||
// peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
|
||||
NvU8 peer_ids[2];
|
||||
|
||||
// The link type between the peer GPUs, currently either PCIe or NVLINK.
|
||||
// This field is used to determine the when this peer struct has been
|
||||
// initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
|
||||
// initialized at GPU registration time. PCIe peers are initialized when
|
||||
// the refcount below goes from 0 to 1.
|
||||
// The link type between the peer parent GPUs, currently either PCIe or
|
||||
// NVLINK.
|
||||
uvm_gpu_link_type_t link_type;
|
||||
|
||||
// Maximum unidirectional bandwidth between the peers in megabytes per
|
||||
@ -1282,10 +1339,6 @@ struct uvm_gpu_peer_struct
|
||||
// See UvmGpuP2PCapsParams.
|
||||
NvU32 total_link_line_rate_mbyte_per_s;
|
||||
|
||||
// For PCIe, the number of times that this has been retained by a VA space.
|
||||
// For NVLINK this will always be 1.
|
||||
NvU64 ref_count;
|
||||
|
||||
// This handle gets populated when enable_peer_access successfully creates
|
||||
// an NV50_P2P object. disable_peer_access resets the same on the object
|
||||
// deletion.
|
||||
@ -1299,9 +1352,13 @@ struct uvm_gpu_peer_struct
|
||||
// GPU-A <-> GPU-B link is bidirectional, pairs[x][0] is always the
|
||||
// local GPU, while pairs[x][1] is the remote GPU. The table shall be
|
||||
// filled like so: [[GPU-A, GPU-B], [GPU-B, GPU-A]].
|
||||
uvm_gpu_t *pairs[2][2];
|
||||
uvm_parent_gpu_t *pairs[2][2];
|
||||
} procfs;
|
||||
};
|
||||
|
||||
// Peer-to-peer state for MIG instance pairs between two different parent
|
||||
// GPUs.
|
||||
uvm_gpu_peer_t gpu_peers[UVM_MAX_UNIQUE_SUB_PROCESSOR_PAIRS];
|
||||
} uvm_parent_gpu_peer_t;
|
||||
|
||||
// Initialize global gpu state
|
||||
NV_STATUS uvm_gpu_init(void);
|
||||
@ -1380,12 +1437,12 @@ static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
|
||||
return atomic64_read(&gpu->retained_count);
|
||||
}
|
||||
|
||||
// Decrease the refcount on the parent GPU object, and actually delete the object
|
||||
// if the refcount hits zero.
|
||||
// Decrease the refcount on the parent GPU object, and actually delete the
|
||||
// object if the refcount hits zero.
|
||||
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);
|
||||
|
||||
// Calculates peer table index using GPU ids.
|
||||
NvU32 uvm_gpu_peer_table_index(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
// Returns a GPU peer pair index in the range [0 .. UVM_MAX_UNIQUE_GPU_PAIRS).
|
||||
NvU32 uvm_gpu_pair_index(const uvm_gpu_id_t id0, const uvm_gpu_id_t id1);
|
||||
|
||||
// Either retains an existing PCIe peer entry or creates a new one. In both
|
||||
// cases the two GPUs are also each retained.
|
||||
@ -1396,35 +1453,26 @@ NV_STATUS uvm_gpu_retain_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
// LOCKING: requires the global lock to be held
|
||||
void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
|
||||
uvm_gpu_link_type_t uvm_parent_gpu_peer_link_type(uvm_parent_gpu_t *parent_gpu0, uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
// Get the aperture for local_gpu to use to map memory resident on remote_gpu.
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Return the reference count for the P2P state between the given GPUs.
|
||||
// The two GPUs must have different parents.
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(const uvm_gpu_id_t gpu_id0, const uvm_gpu_id_t gpu_id1);
|
||||
bool uvm_parent_gpus_are_nvswitch_connected(const uvm_parent_gpu_t *parent_gpu0, const uvm_parent_gpu_t *parent_gpu1);
|
||||
|
||||
// Get the P2P capabilities between the given gpus
|
||||
static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
static bool uvm_gpus_are_smc_peers(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
|
||||
}
|
||||
UVM_ASSERT(gpu0 != gpu1);
|
||||
|
||||
static bool uvm_gpus_are_nvswitch_connected(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
|
||||
{
|
||||
if (gpu0->parent->nvswitch_info.is_nvswitch_connected && gpu1->parent->nvswitch_info.is_nvswitch_connected) {
|
||||
UVM_ASSERT(uvm_gpu_peer_caps(gpu0, gpu1)->link_type >= UVM_GPU_LINK_NVLINK_2);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool uvm_gpus_are_smc_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
{
|
||||
return gpu0->parent == gpu1->parent;
|
||||
}
|
||||
|
||||
@ -1595,9 +1643,6 @@ static bool uvm_parent_gpu_needs_proxy_channel_pool(const uvm_parent_gpu_t *pare
|
||||
|
||||
uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Debug print of GPU properties
|
||||
void uvm_gpu_print(uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
// faults.
|
||||
@ -1637,4 +1682,7 @@ typedef enum
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
} uvm_gpu_buffer_flush_mode_t;
|
||||
|
||||
// PCIe BAR containing static framebuffer memory mappings for PCIe P2P
|
||||
int uvm_device_p2p_static_bar(uvm_gpu_t *gpu);
|
||||
|
||||
#endif // __UVM_GPU_H__
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "nv_uvm_interface.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
@ -43,8 +44,9 @@
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
|
||||
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
|
||||
#define UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
@ -600,7 +602,7 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = NV_ERR_INVALID_DEVICE;
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
UvmGpuAccessCntrConfig default_config =
|
||||
@ -684,7 +686,10 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
|
||||
++get;
|
||||
@ -692,6 +697,7 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
get = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
write_get(parent_gpu, get);
|
||||
}
|
||||
|
||||
@ -830,12 +836,18 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
(fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = ¬ification_cache[notification_index];
|
||||
|
||||
// We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
|
||||
// individually since entries can be written out of order
|
||||
// We cannot just wait for the last entry (the one pointed by put) to
|
||||
// become valid, we have to do it individually since entries can be
|
||||
// written out of order
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
|
||||
goto done;
|
||||
|
||||
// There's no entry to work on and something has gone wrong. Ignore
|
||||
// the rest.
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Prevent later accesses being moved above the read of the valid bit
|
||||
@ -991,7 +1003,9 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
|
||||
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
|
||||
UVM_ASSERT(!(flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR));
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR)
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
|
||||
return status;
|
||||
@ -999,9 +1013,11 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
uvm_gpu_t *gpu,
|
||||
NvU64 base,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
NvU32 flags,
|
||||
uvm_page_mask_t *migrated_mask)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -1016,8 +1032,39 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR) {
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(base);
|
||||
UVM_ASSERT(migrated_mask);
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
NvU32 start_index = i;
|
||||
NvU32 end_index;
|
||||
|
||||
for (end_index = i; end_index < num_entries; end_index++) {
|
||||
NvU32 mask_index = (notification_start[end_index]->address.address - base) / PAGE_SIZE;
|
||||
|
||||
if (!uvm_page_mask_test(migrated_mask, mask_index))
|
||||
break;
|
||||
}
|
||||
|
||||
if (end_index > start_index) {
|
||||
status = access_counter_clear_notifications(gpu,
|
||||
¬ification_start[start_index],
|
||||
end_index - start_index);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
i = end_index;
|
||||
}
|
||||
}
|
||||
else if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) {
|
||||
UVM_ASSERT(!base);
|
||||
UVM_ASSERT(!migrated_mask);
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -1242,7 +1289,7 @@ static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_c
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
@ -1304,7 +1351,7 @@ static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_c
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
@ -1329,7 +1376,7 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
NvU32 out_flags_local = 0;
|
||||
@ -1341,7 +1388,7 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
@ -1473,7 +1520,7 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
|
||||
resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
|
||||
UVM_ASSERT(resident_gpu != NULL);
|
||||
|
||||
if (gpu != resident_gpu && uvm_gpus_are_nvswitch_connected(gpu, resident_gpu)) {
|
||||
if (gpu != resident_gpu && uvm_parent_gpus_are_nvswitch_connected(gpu->parent, resident_gpu->parent)) {
|
||||
UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
|
||||
address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
}
|
||||
@ -1499,7 +1546,7 @@ static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_cont
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -1610,7 +1657,7 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return;
|
||||
|
||||
if (UVM_ID_IS_GPU(resident_id))
|
||||
resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
|
||||
resident_gpu = uvm_gpu_get(resident_id);
|
||||
|
||||
if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
|
||||
uvm_page_mask_set(accessed_pages, page_index);
|
||||
@ -1692,9 +1739,15 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], *out_index - index, flags);
|
||||
flags_status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
0,
|
||||
¬ifications[index],
|
||||
*out_index - index,
|
||||
flags,
|
||||
NULL);
|
||||
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
@ -1713,7 +1766,6 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 base;
|
||||
NvU64 end;
|
||||
NvU64 address;
|
||||
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
@ -1733,7 +1785,13 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (!vma) {
|
||||
// Clear the notification entry to continue receiving access counter
|
||||
// notifications when a new VMA is allocated in this range.
|
||||
status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], 1, flags);
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
0,
|
||||
¬ifications[index],
|
||||
1,
|
||||
UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR,
|
||||
NULL);
|
||||
*out_index = index + 1;
|
||||
return status;
|
||||
}
|
||||
@ -1741,7 +1799,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
|
||||
|
||||
uvm_page_mask_zero(&ats_context->accessed_mask);
|
||||
uvm_page_mask_zero(&ats_context->access_counters.accessed_mask);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
@ -1750,7 +1808,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
|
||||
break;
|
||||
|
||||
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
|
||||
uvm_page_mask_set(&ats_context->access_counters.accessed_mask, (address - base) / PAGE_SIZE);
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
@ -1762,10 +1820,15 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// location is set
|
||||
// If no pages were actually migrated, don't clear the access counters.
|
||||
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(va_space, gpu, ¬ifications[index], *out_index - index, flags);
|
||||
flags_status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
base,
|
||||
¬ifications[index],
|
||||
*out_index - index,
|
||||
UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR,
|
||||
&ats_context->access_counters.migrated_mask);
|
||||
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
@ -1799,25 +1862,32 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
// Avoid clearing the entry by default.
|
||||
NvU32 flags = 0;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
uvm_va_range_managed_t *managed_range = uvm_va_range_to_managed_or_null(va_range);
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
size_t index = uvm_va_range_block_index(va_range, address);
|
||||
if (managed_range) {
|
||||
size_t index = uvm_va_range_block_index(managed_range, address);
|
||||
|
||||
va_block = uvm_va_range_block(va_range, index);
|
||||
va_block = uvm_va_range_block(managed_range, index);
|
||||
|
||||
// If the va_range is a managed range, the notification belongs to a
|
||||
// recently freed va_range if va_block is NULL. If va_block is not
|
||||
// NULL, service_virt_notifications_in_block will process flags.
|
||||
// Clear the notification entry to continue receiving notifications
|
||||
// when a new va_range is allocated in that region.
|
||||
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
flags = UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
}
|
||||
|
||||
if (va_block) {
|
||||
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(va_space, gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu_va_space->gpu,
|
||||
0,
|
||||
batch_context->virt.notifications,
|
||||
1,
|
||||
flags,
|
||||
NULL);
|
||||
*out_index = index + 1;
|
||||
}
|
||||
}
|
||||
@ -1839,7 +1909,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
// - If the va_block isn't HMM, the notification belongs to a recently
|
||||
// freed va_range. Clear the notification entry to continue receiving
|
||||
// notifications when a new va_range is allocated in this region.
|
||||
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
|
||||
(status == NV_ERR_INVALID_ADDRESS) ||
|
||||
@ -1849,9 +1919,11 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
// in the batch.
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu_va_space->gpu,
|
||||
0,
|
||||
batch_context->virt.notifications,
|
||||
1,
|
||||
flags);
|
||||
flags,
|
||||
NULL);
|
||||
|
||||
*out_index = index + 1;
|
||||
}
|
||||
@ -1917,9 +1989,11 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
else {
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
current_entry->gpu,
|
||||
0,
|
||||
&batch_context->virt.notifications[i],
|
||||
1,
|
||||
0);
|
||||
0,
|
||||
NULL);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
@ -1979,6 +2053,64 @@ void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_parent_gpu_t *parent_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_processor_mask_t *retained_gpus;
|
||||
|
||||
retained_gpus = uvm_processor_mask_cache_alloc();
|
||||
if (!retained_gpus)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_processor_mask_zero(retained_gpus);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
|
||||
if (gpu->parent == parent_gpu)
|
||||
continue;
|
||||
|
||||
uvm_gpu_retain(gpu);
|
||||
uvm_processor_mask_set(retained_gpus, gpu->id);
|
||||
parent_gpu = gpu->parent;
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus) {
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
continue;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
// Access counters not enabled. Nothing to clear
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count) {
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
|
||||
status = access_counter_clear_all(gpu);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus)
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
uvm_processor_mask_cache_free(retained_gpus);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static const NvU32 g_uvm_access_counters_threshold_max = (1 << 15) - 1;
|
||||
|
||||
static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,
|
||||
|
@ -579,8 +579,9 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_t fault_access_type = fault_entry->fault_access_type;
|
||||
uvm_ats_fault_context_t *ats_context = &non_replayable_faults->ats_context;
|
||||
|
||||
uvm_page_mask_zero(&ats_context->read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.accessed_mask);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
|
||||
|
||||
@ -597,14 +598,17 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
NvU64 base = UVM_VA_BLOCK_ALIGN_DOWN(fault_address);
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
|
||||
uvm_page_index_t page_index = (fault_address - base) / PAGE_SIZE;
|
||||
uvm_page_mask_t *fault_mask = (fault_access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) ?
|
||||
&ats_context->write_fault_mask :
|
||||
&ats_context->read_fault_mask;
|
||||
&ats_context->faults.write_fault_mask :
|
||||
&ats_context->faults.read_fault_mask;
|
||||
|
||||
uvm_page_mask_set(fault_mask, page_index);
|
||||
|
||||
uvm_page_mask_set(accessed_mask, page_index);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, ats_context);
|
||||
if (status == NV_OK) {
|
||||
// Invalidate ATS TLB entries if needed
|
||||
|
@ -644,7 +644,15 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
|
||||
UVM_SPIN_WHILE(!parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
// Channels might be idle (e.g. in teardown) so check for errors
|
||||
// actively. In that case the gpu pointer is valid.
|
||||
status = gpu ? uvm_channel_manager_check_errors(gpu->channel_manager) : uvm_global_get_status();
|
||||
if (status != NV_OK) {
|
||||
write_get(parent_gpu, get);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
fault_buffer_skip_replayable_entry(parent_gpu, get);
|
||||
++get;
|
||||
@ -890,6 +898,10 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
|
||||
status = uvm_global_get_status();
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Prevent later accesses being moved above the read of the valid bit
|
||||
@ -1410,7 +1422,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
&end);
|
||||
}
|
||||
else {
|
||||
policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
policy = &va_block->managed_range->policy;
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
@ -1689,11 +1701,11 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
const uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
const uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->faults.reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults.faults_serviced_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->faults.accessed_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
@ -1763,8 +1775,8 @@ static void start_new_sub_batch(NvU64 *sub_batch_base,
|
||||
NvU32 fault_index,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_page_mask_zero(&ats_context->read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->write_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.read_fault_mask);
|
||||
uvm_page_mask_zero(&ats_context->faults.write_fault_mask);
|
||||
|
||||
*sub_batch_fault_index = fault_index;
|
||||
*sub_batch_base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
@ -1784,8 +1796,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_buffer_entry_t *previous_entry = NULL;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->faults.read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->faults.write_fault_mask;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
bool replay_per_va_block =
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
|
@ -507,11 +507,12 @@ uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semapho
|
||||
return uvm_gpu_address_virtual_unprotected(encrypted_base_va + semaphore->index * UVM_SEMAPHORE_SIZE);
|
||||
}
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
char *notifier_base_va = uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
uvm_gpu_semaphore_notifier_t *notifier_base_va =
|
||||
uvm_rm_mem_get_cpu_va(semaphore->page->conf_computing.notifier_memory);
|
||||
|
||||
return (NvU32*)(notifier_base_va + semaphore->index * sizeof(NvU32));
|
||||
return notifier_base_va + semaphore->index;
|
||||
}
|
||||
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
@ -519,7 +520,8 @@ uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *sem
|
||||
NvU64 notifier_base_va = uvm_rm_mem_get_gpu_uvm_va(semaphore->page->conf_computing.notifier_memory,
|
||||
semaphore->page->pool->gpu);
|
||||
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va + semaphore->index * sizeof(NvU32));
|
||||
return uvm_gpu_address_virtual_unprotected(notifier_base_va +
|
||||
semaphore->index * sizeof(uvm_gpu_semaphore_notifier_t));
|
||||
}
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore)
|
||||
@ -622,22 +624,11 @@ void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem)
|
||||
uvm_gpu_semaphore_free(&tracking_sem->semaphore);
|
||||
}
|
||||
|
||||
static bool should_skip_secure_semaphore_update(NvU32 last_observed_notifier, NvU32 gpu_notifier)
|
||||
static void gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
// No new value, or the GPU is currently writing the new encrypted material
|
||||
// and no change in value would still result in corrupted data.
|
||||
return (last_observed_notifier == gpu_notifier) || (gpu_notifier % 2);
|
||||
}
|
||||
|
||||
static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, uvm_gpu_semaphore_t *semaphore)
|
||||
{
|
||||
UvmCslIv local_iv;
|
||||
NvU32 local_payload;
|
||||
NvU32 new_sem_value;
|
||||
NvU32 gpu_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
NvU32 new_gpu_notifier = 0;
|
||||
NvU32 iv_index = 0;
|
||||
uvm_gpu_semaphore_notifier_t gpu_notifier;
|
||||
uvm_gpu_semaphore_notifier_t new_gpu_notifier = 0;
|
||||
|
||||
// A channel can have multiple entries pending and the tracking semaphore
|
||||
// update of each entry can race with this function. Since the semaphore
|
||||
@ -646,62 +637,72 @@ static void uvm_gpu_semaphore_encrypted_payload_update(uvm_channel_t *channel, u
|
||||
unsigned tries_left = channel->num_gpfifo_entries;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU8 local_auth_tag[UVM_CONF_COMPUTING_AUTH_TAG_SIZE];
|
||||
UvmCslIv *ivs_cpu_addr = semaphore->conf_computing.ivs;
|
||||
NvU32 *gpu_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
|
||||
uvm_gpu_semaphore_notifier_t *semaphore_notifier_cpu_addr = uvm_gpu_semaphore_get_notifier_cpu_va(semaphore);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
UVM_ASSERT(uvm_channel_is_ce(channel));
|
||||
|
||||
last_observed_notifier = semaphore->conf_computing.last_observed_notifier;
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
UVM_ASSERT(last_observed_notifier <= gpu_notifier);
|
||||
|
||||
if (should_skip_secure_semaphore_update(last_observed_notifier, gpu_notifier))
|
||||
return;
|
||||
|
||||
do {
|
||||
gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
gpu_notifier = READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
|
||||
UVM_ASSERT(gpu_notifier >= semaphore->conf_computing.last_observed_notifier);
|
||||
|
||||
// Odd notifier value means there's an update in progress.
|
||||
if (gpu_notifier % 2)
|
||||
continue;
|
||||
|
||||
// There's no change since last time
|
||||
if (gpu_notifier == semaphore->conf_computing.last_observed_notifier)
|
||||
return;
|
||||
|
||||
// Make sure no memory accesses happen before we read the notifier
|
||||
smp_mb__after_atomic();
|
||||
|
||||
iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
memcpy(local_auth_tag, uvm_gpu_semaphore_get_auth_tag_cpu_va(semaphore), sizeof(local_auth_tag));
|
||||
local_payload = UVM_READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
memcpy(&local_iv, &ivs_cpu_addr[iv_index], sizeof(local_iv));
|
||||
local_payload = READ_ONCE(*uvm_gpu_semaphore_get_encrypted_payload_cpu_va(semaphore));
|
||||
|
||||
// Make sure the second read of notifier happens after
|
||||
// all memory accesses.
|
||||
smp_mb__before_atomic();
|
||||
new_gpu_notifier = UVM_READ_ONCE(*gpu_notifier_cpu_addr);
|
||||
new_gpu_notifier = READ_ONCE(*semaphore_notifier_cpu_addr);
|
||||
tries_left--;
|
||||
} while ((tries_left > 0) && ((gpu_notifier != new_gpu_notifier) || (gpu_notifier % 2)));
|
||||
|
||||
if (!tries_left) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto error;
|
||||
}
|
||||
else {
|
||||
NvU32 key_version;
|
||||
const NvU32 iv_index = (gpu_notifier / 2) % channel->num_gpfifo_entries;
|
||||
NvU32 new_semaphore_value;
|
||||
|
||||
UVM_ASSERT(gpu_notifier == new_gpu_notifier);
|
||||
UVM_ASSERT(gpu_notifier % 2 == 0);
|
||||
|
||||
// CPU decryption is guaranteed to use the same key version as the
|
||||
// associated GPU encryption, because if there was any key rotation in
|
||||
// between, then key rotation waited for all channels to complete before
|
||||
// proceeding. The wait implies that the semaphore value matches the
|
||||
// last one encrypted on the GPU, so this CPU decryption should happen
|
||||
// before the key is rotated.
|
||||
key_version = uvm_channel_pool_key_version(channel->pool);
|
||||
|
||||
if (gpu_notifier == new_gpu_notifier) {
|
||||
status = uvm_conf_computing_cpu_decrypt(channel,
|
||||
&new_sem_value,
|
||||
&new_semaphore_value,
|
||||
&local_payload,
|
||||
&local_iv,
|
||||
sizeof(new_sem_value),
|
||||
&semaphore->conf_computing.ivs[iv_index],
|
||||
key_version,
|
||||
sizeof(new_semaphore_value),
|
||||
&local_auth_tag);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_sem_value);
|
||||
UVM_WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
|
||||
}
|
||||
uvm_gpu_semaphore_set_payload(semaphore, new_semaphore_value);
|
||||
WRITE_ONCE(semaphore->conf_computing.last_observed_notifier, new_gpu_notifier);
|
||||
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
error:
|
||||
// Decryption failure is a fatal error as well as running out of try left.
|
||||
@ -728,7 +729,7 @@ static NvU64 update_completed_value_locked(uvm_gpu_tracking_semaphore_t *trackin
|
||||
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore
|
||||
// mechanism to all semaphore
|
||||
uvm_channel_t *channel = container_of(tracking_semaphore, uvm_channel_t, tracking_sem);
|
||||
uvm_gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
gpu_semaphore_encrypted_payload_update(channel, &tracking_semaphore->semaphore);
|
||||
}
|
||||
|
||||
new_sem_value = uvm_gpu_semaphore_get_payload(&tracking_semaphore->semaphore);
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "uvm_rm_mem.h"
|
||||
#include "uvm_linux.h"
|
||||
|
||||
typedef NvU32 uvm_gpu_semaphore_notifier_t;
|
||||
|
||||
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
|
||||
// that's used for synchronization among them.
|
||||
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
|
||||
@ -52,8 +54,8 @@ struct uvm_gpu_semaphore_struct
|
||||
UvmCslIv *ivs;
|
||||
NvU32 cached_payload;
|
||||
|
||||
NvU32 last_pushed_notifier;
|
||||
NvU32 last_observed_notifier;
|
||||
uvm_gpu_semaphore_notifier_t last_pushed_notifier;
|
||||
uvm_gpu_semaphore_notifier_t last_observed_notifier;
|
||||
} conf_computing;
|
||||
};
|
||||
|
||||
@ -154,7 +156,7 @@ NvU32 *uvm_gpu_semaphore_get_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
NvU32 *uvm_gpu_semaphore_get_encrypted_payload_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_encrypted_payload_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
NvU32 *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_semaphore_notifier_t *uvm_gpu_semaphore_get_notifier_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
uvm_gpu_address_t uvm_gpu_semaphore_get_notifier_gpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
||||
void *uvm_gpu_semaphore_get_auth_tag_cpu_va(uvm_gpu_semaphore_t *semaphore);
|
||||
|
@ -73,6 +73,24 @@ module_param(uvm_disable_hmm, bool, 0444);
|
||||
#include "uvm_va_policy.h"
|
||||
#include "uvm_tools.h"
|
||||
|
||||
// The function nv_PageSwapCache() wraps the check for page swap cache flag in
|
||||
// order to support a wide variety of kernel versions.
|
||||
// The function PageSwapCache() is removed after 32f51ead3d77 ("mm: remove
|
||||
// PageSwapCache") in v6.12-rc1.
|
||||
// The function folio_test_swapcache() was added in Linux 5.16 (d389a4a811551
|
||||
// "mm: Add folio flag manipulation functions")
|
||||
// Systems with HMM patches backported to 5.14 are possible, but those systems
|
||||
// do not include folio_test_swapcache()
|
||||
// TODO: Bug 4050579: Remove this when migration of swap cached pages is updated
|
||||
static __always_inline bool nv_PageSwapCache(struct page *page)
|
||||
{
|
||||
#if defined(NV_FOLIO_TEST_SWAPCACHE_PRESENT)
|
||||
return folio_test_swapcache(page_folio(page));
|
||||
#else
|
||||
return PageSwapCache(page);
|
||||
#endif
|
||||
}
|
||||
|
||||
static NV_STATUS gpu_chunk_add(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
struct page *page);
|
||||
@ -853,7 +871,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = uvm_va_block_split_locked(va_block, new_end, new_va_block, NULL);
|
||||
status = uvm_va_block_split_locked(va_block, new_end, new_va_block);
|
||||
if (status != NV_OK)
|
||||
goto err;
|
||||
|
||||
@ -1351,7 +1369,7 @@ void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_processor_mask_andnot(map_processors, &va_block->evicted_gpus, &node->policy.accessed_by);
|
||||
|
||||
for_each_gpu_id_in_mask(id, map_processors) {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(id);
|
||||
uvm_va_block_gpu_state_t *gpu_state;
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
@ -1981,7 +1999,7 @@ static void fill_dst_pfns(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *same_devmem_page_mask,
|
||||
uvm_processor_id_t dest_id)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_block->hmm.va_space, dest_id);
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(dest_id);
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
uvm_page_mask_zero(same_devmem_page_mask);
|
||||
@ -2694,7 +2712,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PageSwapCache(src_page)) {
|
||||
if (nv_PageSwapCache(src_page)) {
|
||||
// TODO: Bug 4050579: Remove this when swap cached pages can be
|
||||
// migrated.
|
||||
status = NV_WARN_MISMATCHED_TARGET;
|
||||
@ -3512,17 +3530,17 @@ NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
*endp = end;
|
||||
|
||||
if (params) {
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->resident_on[0], UVM_ID_CPU);
|
||||
uvm_processor_get_uuid(UVM_ID_CPU, ¶ms->resident_on[0]);
|
||||
params->resident_physical_size[0] = PAGE_SIZE;
|
||||
params->resident_on_count = 1;
|
||||
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->mapped_on[0], UVM_ID_CPU);
|
||||
uvm_processor_get_uuid(UVM_ID_CPU, ¶ms->mapped_on[0]);
|
||||
params->mapping_type[0] = (vma->vm_flags & VM_WRITE) ?
|
||||
UVM_PROT_READ_WRITE_ATOMIC : UVM_PROT_READ_ONLY;
|
||||
params->page_size[0] = PAGE_SIZE;
|
||||
params->mapped_on_count = 1;
|
||||
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->populated_on[0], UVM_ID_CPU);
|
||||
uvm_processor_get_uuid(UVM_ID_CPU, ¶ms->populated_on[0]);
|
||||
params->populated_on_count = 1;
|
||||
}
|
||||
|
||||
@ -3676,12 +3694,12 @@ NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
params->read_duplication = node->policy.read_duplication;
|
||||
|
||||
if (!UVM_ID_IS_INVALID(node->policy.preferred_location)) {
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->preferred_location, node->policy.preferred_location);
|
||||
uvm_processor_get_uuid(node->policy.preferred_location, ¶ms->preferred_location);
|
||||
params->preferred_cpu_nid = node->policy.preferred_nid;
|
||||
}
|
||||
|
||||
for_each_id_in_mask(processor_id, &node->policy.accessed_by)
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->accessed_by[params->accessed_by_count++], processor_id);
|
||||
uvm_processor_get_uuid(processor_id, ¶ms->accessed_by[params->accessed_by_count++]);
|
||||
}
|
||||
else {
|
||||
uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree, params->lookup_address,
|
||||
|
@ -186,19 +186,19 @@ static NV_STATUS test_semaphore_acquire(uvm_gpu_t *gpu)
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Wait for sema_A release.
|
||||
UVM_SPIN_WHILE(UVM_READ_ONCE(*cpu_sema_A) != 1, &spin);
|
||||
UVM_SPIN_WHILE(READ_ONCE(*cpu_sema_A) != 1, &spin);
|
||||
|
||||
// Sleep for 10ms, the GPU waits while sema_B is held by us.
|
||||
msleep(10);
|
||||
|
||||
check_sema_C = UVM_READ_ONCE(*cpu_sema_C) == 0;
|
||||
check_sema_C = READ_ONCE(*cpu_sema_C) == 0;
|
||||
|
||||
// memory fence/barrier, check comment in
|
||||
// uvm_gpu_semaphore.c:uvm_gpu_semaphore_set_payload() for details.
|
||||
mb();
|
||||
|
||||
// Release sema_B.
|
||||
UVM_WRITE_ONCE(*cpu_sema_B, 1);
|
||||
WRITE_ONCE(*cpu_sema_B, 1);
|
||||
|
||||
// Wait for the GPU to release sema_C, i.e., the end of the push.
|
||||
status = uvm_push_wait(&push);
|
||||
@ -207,7 +207,7 @@ static NV_STATUS test_semaphore_acquire(uvm_gpu_t *gpu)
|
||||
// check_sema_C is validated here to ensure the push has ended and was not
|
||||
// interrupted in the middle, had the check failed.
|
||||
TEST_CHECK_GOTO(check_sema_C, done);
|
||||
TEST_CHECK_GOTO(UVM_READ_ONCE(*cpu_sema_C) == 1, done);
|
||||
TEST_CHECK_GOTO(READ_ONCE(*cpu_sema_C) == 1, done);
|
||||
|
||||
done:
|
||||
test_semaphore_free_sem(gpu, &mem);
|
||||
|
@ -847,7 +847,6 @@ typedef struct
|
||||
NvProcessorUuid processor; // IN
|
||||
NvU32 allProcessors; // IN
|
||||
NvU32 uvmFd; // IN
|
||||
NvU32 version; // IN (UvmToolsEventQueueVersion)
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS;
|
||||
|
||||
@ -934,7 +933,6 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
NvU64 tablePtr NV_ALIGN_BYTES(8); // IN
|
||||
NvU32 version; // IN (UvmToolsEventQueueVersion)
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS;
|
||||
|
||||
@ -1097,6 +1095,36 @@ typedef struct
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_MM_INITIALIZE_PARAMS;
|
||||
|
||||
#define UVM_TOOLS_INIT_EVENT_TRACKER_V2 UVM_IOCTL_BASE(76)
|
||||
typedef UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS UVM_TOOLS_INIT_EVENT_TRACKER_V2_PARAMS;
|
||||
|
||||
#define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_V2 UVM_IOCTL_BASE(77)
|
||||
typedef UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_V2_PARAMS;
|
||||
|
||||
//
|
||||
// UvmAllocDeviceP2P
|
||||
//
|
||||
#define UVM_ALLOC_DEVICE_P2P UVM_IOCTL_BASE(78)
|
||||
typedef struct
|
||||
{
|
||||
NvU64 base NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 length NV_ALIGN_BYTES(8); // IN
|
||||
NvU64 offset NV_ALIGN_BYTES(8); // IN
|
||||
NvProcessorUuid gpuUuid; // IN
|
||||
NvS32 rmCtrlFd; // IN
|
||||
NvU32 hClient; // IN
|
||||
NvU32 hMemory; // IN
|
||||
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_ALLOC_DEVICE_P2P_PARAMS;
|
||||
|
||||
#define UVM_CLEAR_ALL_ACCESS_COUNTERS UVM_IOCTL_BASE(79)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NV_STATUS rmStatus; // OUT
|
||||
} UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS;
|
||||
|
||||
//
|
||||
// Temporary ioctls which should be removed before UVM 8 release
|
||||
// Number backwards from 2047 - highest custom ioctl function number
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include <linux/jhash.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#if defined(NV_ASM_BARRIER_H_PRESENT)
|
||||
#include <asm/barrier.h>
|
||||
@ -147,21 +148,8 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
|
||||
// See bug 1707453 for further details about setting the minimum kernel version.
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
||||
# error This driver does not support kernels older than 3.10!
|
||||
#endif
|
||||
|
||||
#if !defined(VM_RESERVED)
|
||||
#define VM_RESERVED 0x00000000
|
||||
#endif
|
||||
#if !defined(VM_DONTEXPAND)
|
||||
#define VM_DONTEXPAND 0x00000000
|
||||
#endif
|
||||
#if !defined(VM_DONTDUMP)
|
||||
#define VM_DONTDUMP 0x00000000
|
||||
#endif
|
||||
#if !defined(VM_MIXEDMAP)
|
||||
#define VM_MIXEDMAP 0x00000000
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
|
||||
# error This driver does not support kernels older than 4.4!
|
||||
#endif
|
||||
|
||||
//
|
||||
@ -185,94 +173,8 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
printk(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// printk_ratelimited was added in 2.6.33 via commit
|
||||
// 8a64f336bc1d4aa203b138d29d5a9c414a9fbb47. If not available, we prefer not
|
||||
// printing anything since it's supposed to be rate-limited.
|
||||
#if !defined(printk_ratelimited)
|
||||
#define printk_ratelimited UVM_NO_PRINT
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
// Just too much compilation trouble with the rate-limiting printk feature
|
||||
// until about k3.8. Because the non-rate-limited printing will cause
|
||||
// surprises and problems, just turn it off entirely in this situation.
|
||||
//
|
||||
#undef pr_debug_ratelimited
|
||||
#define pr_debug_ratelimited UVM_NO_PRINT
|
||||
#endif
|
||||
|
||||
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
|
||||
#if !defined(pmd_large)
|
||||
#define pmd_large(_pmd) \
|
||||
((pmd_val(_pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
|
||||
#endif
|
||||
#endif /* defined(NVCPU_X86) || defined(NVCPU_X86_64) */
|
||||
|
||||
#if !defined(GFP_DMA32)
|
||||
/*
|
||||
* GFP_DMA32 is similar to GFP_DMA, but instructs the Linux zone
|
||||
* allocator to allocate memory from the first 4GB on platforms
|
||||
* such as Linux/x86-64; the alternative is to use an IOMMU such
|
||||
* as the one implemented with the K8 GART, if available.
|
||||
*/
|
||||
#define GFP_DMA32 0
|
||||
#endif
|
||||
|
||||
#if !defined(__GFP_NOWARN)
|
||||
#define __GFP_NOWARN 0
|
||||
#endif
|
||||
|
||||
#if !defined(__GFP_NORETRY)
|
||||
#define __GFP_NORETRY 0
|
||||
#endif
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
|
||||
// Develop builds define DEBUG but enable optimization
|
||||
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
|
||||
// Wrappers for functions not building correctly without optimizations on,
|
||||
// implemented in uvm_debug_optimized.c. Notably the file is only built for
|
||||
// debug builds, not develop or release builds.
|
||||
|
||||
// Unoptimized builds of atomic_xchg() hit a BUILD_BUG() on arm64 as it relies
|
||||
// on __xchg being completely inlined:
|
||||
// /usr/src/linux-3.12.19/arch/arm64/include/asm/cmpxchg.h:67:3: note: in expansion of macro 'BUILD_BUG'
|
||||
//
|
||||
// Powerppc hits a similar issue, but ends up with an undefined symbol:
|
||||
// WARNING: "__xchg_called_with_bad_pointer" [...] undefined!
|
||||
int nv_atomic_xchg(atomic_t *val, int new);
|
||||
|
||||
// Same problem as atomic_xchg() on powerppc:
|
||||
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
|
||||
int nv_atomic_cmpxchg(atomic_t *val, int old, int new);
|
||||
|
||||
// Same problem as atomic_xchg() on powerppc:
|
||||
// WARNING: "__cmpxchg_called_with_bad_pointer" [...] undefined!
|
||||
long nv_atomic_long_cmpxchg(atomic_long_t *val, long old, long new);
|
||||
|
||||
// This Linux kernel commit:
|
||||
// 2016-08-30 0d025d271e55f3de21f0aaaf54b42d20404d2b23
|
||||
// leads to build failures on x86_64, when compiling without optimization. Avoid
|
||||
// that problem, by providing our own builds of copy_from_user / copy_to_user,
|
||||
// for debug (non-optimized) UVM builds. Those are accessed via these
|
||||
// nv_copy_to/from_user wrapper functions.
|
||||
//
|
||||
// Bug 1849583 has further details.
|
||||
unsigned long nv_copy_from_user(void *to, const void __user *from, unsigned long n);
|
||||
unsigned long nv_copy_to_user(void __user *to, const void *from, unsigned long n);
|
||||
|
||||
#else
|
||||
#define nv_atomic_xchg atomic_xchg
|
||||
#define nv_atomic_cmpxchg atomic_cmpxchg
|
||||
#define nv_atomic_long_cmpxchg atomic_long_cmpxchg
|
||||
#define nv_copy_to_user copy_to_user
|
||||
#define nv_copy_from_user copy_from_user
|
||||
#endif
|
||||
|
||||
#ifndef NV_ALIGN_DOWN
|
||||
#define NV_ALIGN_DOWN(v,g) ((v) & ~((g) - 1))
|
||||
#endif
|
||||
|
||||
#if defined(NVCPU_X86)
|
||||
/* Some old IA32 kernels don't have 64/64 division routines,
|
||||
* they only support 64/32 division with do_div(). */
|
||||
@ -295,7 +197,6 @@ static inline uint64_t NV_DIV64(uint64_t dividend, uint64_t divisor, uint64_t *r
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CLOCK_MONOTONIC_RAW)
|
||||
/* Return a nanosecond-precise value */
|
||||
static inline NvU64 NV_GETTIME(void)
|
||||
{
|
||||
@ -304,60 +205,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
ktime_get_raw_ts64(&tm);
|
||||
return (NvU64) timespec64_to_ns(&tm);
|
||||
}
|
||||
#else
|
||||
/* We can only return a microsecond-precise value with the
|
||||
* available non-GPL symbols. */
|
||||
static inline NvU64 NV_GETTIME(void)
|
||||
{
|
||||
struct timespec64 tm;
|
||||
|
||||
ktime_get_real_ts64(&tm);
|
||||
return (NvU64) timespec64_to_ns(&tm);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(ilog2)
|
||||
static inline int NV_ILOG2_U32(u32 n)
|
||||
{
|
||||
return fls(n) - 1;
|
||||
}
|
||||
static inline int NV_ILOG2_U64(u64 n)
|
||||
{
|
||||
return fls64(n) - 1;
|
||||
}
|
||||
#define ilog2(n) (sizeof(n) <= 4 ? NV_ILOG2_U32(n) : NV_ILOG2_U64(n))
|
||||
#endif
|
||||
|
||||
// for_each_bit added in 2.6.24 via commit 3e037454bcfa4b187e8293d2121bd8c0f5a5c31c
|
||||
// later renamed in 2.6.34 via commit 984b3f5746ed2cde3d184651dabf26980f2b66e5
|
||||
#if !defined(for_each_set_bit)
|
||||
#define for_each_set_bit(bit, addr, size) for_each_bit((bit), (addr), (size))
|
||||
#endif
|
||||
|
||||
// for_each_set_bit_cont was added in 3.2 via 1e2ad28f80b4e155678259238f51edebc19e4014
|
||||
// It was renamed to for_each_set_bit_from in 3.3 via 307b1cd7ecd7f3dc5ce3d3860957f034f0abe4df
|
||||
#if !defined(for_each_set_bit_from)
|
||||
#define for_each_set_bit_from(bit, addr, size) \
|
||||
for ((bit) = find_next_bit((addr), (size), (bit)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// for_each_clear_bit and for_each_clear_bit_from were added in 3.10 via
|
||||
// 03f4a8226c2f9c14361f75848d1e93139bab90c4
|
||||
#if !defined(for_each_clear_bit)
|
||||
#define for_each_clear_bit(bit, addr, size) \
|
||||
for ((bit) = find_first_zero_bit((addr), (size)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
#if !defined(for_each_clear_bit_from)
|
||||
#define for_each_clear_bit_from(bit, addr, size) \
|
||||
for ((bit) = find_next_zero_bit((addr), (size), (bit)); \
|
||||
(bit) < (size); \
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
#if !defined(NV_FIND_NEXT_BIT_WRAP_PRESENT)
|
||||
static inline unsigned long find_next_bit_wrap(const unsigned long *addr, unsigned long size, unsigned long offset)
|
||||
@ -400,71 +247,6 @@ static inline unsigned long __for_each_wrap(const unsigned long *bitmap,
|
||||
(bit) = __for_each_wrap((addr), (size), (start), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// Added in 2.6.24
|
||||
#ifndef ACCESS_ONCE
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
#endif
|
||||
|
||||
// WRITE_ONCE/READ_ONCE have incompatible definitions across versions, which produces warnings.
|
||||
// Therefore, we define our own macros
|
||||
#define UVM_WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
|
||||
#define UVM_READ_ONCE(x) ACCESS_ONCE(x)
|
||||
|
||||
// smp_mb__before_atomic was added in 3.16, provide a fallback
|
||||
#ifndef smp_mb__before_atomic
|
||||
#if NVCPU_IS_X86 || NVCPU_IS_X86_64
|
||||
// That's what the kernel does for x86
|
||||
#define smp_mb__before_atomic() barrier()
|
||||
#else
|
||||
// That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
|
||||
#define smp_mb__before_atomic() smp_mb()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// smp_mb__after_atomic was added in 3.16, provide a fallback
|
||||
#ifndef smp_mb__after_atomic
|
||||
#if NVCPU_IS_X86 || NVCPU_IS_X86_64
|
||||
// That's what the kernel does for x86
|
||||
#define smp_mb__after_atomic() barrier()
|
||||
#else
|
||||
// That's what the kernel does for at least arm32, arm64 and powerpc as of 4.3
|
||||
#define smp_mb__after_atomic() smp_mb()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// smp_load_acquire and smp_store_release were added in commit
|
||||
// 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 ("arch: Introduce
|
||||
// smp_load_acquire(), smp_store_release()") in v3.14 (2013-11-06).
|
||||
#ifndef smp_load_acquire
|
||||
#define smp_load_acquire(p) \
|
||||
({ \
|
||||
typeof(*(p)) __v = UVM_READ_ONCE(*(p)); \
|
||||
smp_mb(); \
|
||||
__v; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef smp_store_release
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
smp_mb(); \
|
||||
UVM_WRITE_ONCE(*(p), v); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
// atomic_read_acquire and atomic_set_release were added in commit
|
||||
// 654672d4ba1a6001c365833be895f9477c4d5eab ("locking/atomics:
|
||||
// Add _{acquire|release|relaxed}() variants of some atomic operations") in v4.3
|
||||
// (2015-08-06).
|
||||
// TODO: Bug 3849079: We always use this definition on newer kernels.
|
||||
#ifndef atomic_read_acquire
|
||||
#define atomic_read_acquire(p) smp_load_acquire(&(p)->counter)
|
||||
#endif
|
||||
|
||||
#ifndef atomic_set_release
|
||||
#define atomic_set_release(p, v) smp_store_release(&(p)->counter, v)
|
||||
#endif
|
||||
|
||||
// atomic_long_read_acquire and atomic_long_set_release were added in commit
|
||||
// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
|
||||
// generated atomic-long") in v5.1 (2019-05-05).
|
||||
@ -484,29 +266,6 @@ static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
|
||||
atomic_long_set(p, v);
|
||||
}
|
||||
|
||||
// Added in 3.11
|
||||
#ifndef PAGE_ALIGNED
|
||||
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
|
||||
#endif
|
||||
|
||||
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
|
||||
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
|
||||
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
|
||||
wait_on_bit_lock(word, bit, mode)
|
||||
#elif (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 4)
|
||||
static __sched int uvm_bit_wait(void *word)
|
||||
{
|
||||
if (signal_pending_state(current->state, current))
|
||||
return 1;
|
||||
schedule();
|
||||
return 0;
|
||||
}
|
||||
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
|
||||
wait_on_bit_lock(word, bit, uvm_bit_wait, mode)
|
||||
#else
|
||||
#error "Unknown number of arguments"
|
||||
#endif
|
||||
|
||||
static void uvm_init_radix_tree_preloadable(struct radix_tree_root *tree)
|
||||
{
|
||||
// GFP_NOWAIT, or some combination of flags that avoids setting
|
||||
@ -596,6 +355,8 @@ typedef struct
|
||||
#include <asm/pgtable_types.h>
|
||||
#endif
|
||||
|
||||
// Added in 57bd1905b228f (acpi, x86/mm: Remove encryption mask from ACPI page
|
||||
// protection type), v4.13
|
||||
#if !defined(PAGE_KERNEL_NOENC)
|
||||
#define PAGE_KERNEL_NOENC PAGE_KERNEL
|
||||
#endif
|
||||
@ -621,15 +382,4 @@ static inline pgprot_t uvm_pgprot_decrypted(pgprot_t prot)
|
||||
return prot;
|
||||
}
|
||||
|
||||
// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
|
||||
//
|
||||
// Archs with CONFIG_MMU should have their own page.h, and can't include
|
||||
// asm-generic/page.h. However, x86, powerpc, arm64 don't define page_to_virt()
|
||||
// macro in their version of page.h.
|
||||
#include <linux/mm.h>
|
||||
#ifndef page_to_virt
|
||||
#include <asm/page.h>
|
||||
#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
|
||||
#endif
|
||||
|
||||
#endif // _UVM_LINUX_H
|
||||
|
@ -27,7 +27,7 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 34);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
@ -48,7 +48,9 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_KEY_ROTATION_WLC);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_WLC_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CSL_SEC2_PUSH);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
|
||||
|
@ -322,6 +322,15 @@
|
||||
// Operations not allowed while holding this lock
|
||||
// - GPU memory allocation which can evict
|
||||
//
|
||||
// - Channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex per channel pool
|
||||
//
|
||||
// The lock ensures mutual exclusion during key rotation affecting all the
|
||||
// channels in the associated pool. Key rotation in WLC pools is handled
|
||||
// using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
|
||||
//
|
||||
// - CE channel CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@ -338,6 +347,15 @@
|
||||
// Operations allowed while holding this lock
|
||||
// - Pushing work to CE channels (except for WLC channels)
|
||||
//
|
||||
// - WLC channel pool key rotation lock
|
||||
// Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
|
||||
// Condition: Confidential Computing is enabled
|
||||
// Mutex of WLC channel pool
|
||||
//
|
||||
// The lock has the same purpose as the regular channel pool key rotation
|
||||
// lock. Using a different order lock for WLC channels allows key rotation
|
||||
// on those channels during indirect work submission.
|
||||
//
|
||||
// - WLC CSL channel pool semaphore
|
||||
// Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
|
||||
// Condition: The Confidential Computing feature is enabled
|
||||
@ -484,7 +502,9 @@ typedef enum
|
||||
UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
|
||||
UVM_LOCK_ORDER_CHUNK_MAPPING,
|
||||
UVM_LOCK_ORDER_PAGE_TREE,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION,
|
||||
UVM_LOCK_ORDER_CSL_PUSH,
|
||||
UVM_LOCK_ORDER_KEY_ROTATION_WLC,
|
||||
UVM_LOCK_ORDER_CSL_WLC_PUSH,
|
||||
UVM_LOCK_ORDER_CSL_SEC2_PUSH,
|
||||
UVM_LOCK_ORDER_PUSH,
|
||||
@ -1208,7 +1228,7 @@ static void __uvm_bit_lock(uvm_bit_locks_t *bit_locks, unsigned long bit)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = UVM_WAIT_ON_BIT_LOCK(bit_locks->bits, bit, TASK_UNINTERRUPTIBLE);
|
||||
res = wait_on_bit_lock(bit_locks->bits, bit, TASK_UNINTERRUPTIBLE);
|
||||
UVM_ASSERT_MSG(res == 0, "Uninterruptible task interrupted: %d\n", res);
|
||||
uvm_assert_bit_locked(bit_locks, bit);
|
||||
}
|
||||
|
@ -171,8 +171,11 @@ static NV_STATUS uvm_pte_buffer_get(uvm_pte_buffer_t *pte_buffer,
|
||||
pte_buffer->mapping_info.pteBufferSize = pte_buffer->num_ptes * pte_buffer->pte_size;
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_CHANNEL) {
|
||||
uvm_va_range_channel_t *channel_range;
|
||||
|
||||
channel_range = uvm_va_range_to_channel(va_range);
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceGetChannelResourcePtes(gpu_va_space->duped_gpu_va_space,
|
||||
va_range->channel.rm_descriptor,
|
||||
channel_range->rm_descriptor,
|
||||
map_offset,
|
||||
pte_buffer->num_ptes * pte_buffer->page_size,
|
||||
&pte_buffer->mapping_info));
|
||||
@ -345,8 +348,8 @@ static NV_STATUS map_rm_pt_range(uvm_page_tree_t *tree,
|
||||
static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map)
|
||||
{
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_CHANNEL) {
|
||||
return uvm_hal_downgrade_membar_type(va_range->channel.gpu_va_space->gpu,
|
||||
va_range->channel.aperture == UVM_APERTURE_VID);
|
||||
return uvm_hal_downgrade_membar_type(uvm_va_range_to_channel(va_range)->gpu_va_space->gpu,
|
||||
uvm_va_range_to_channel(va_range)->aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
// If there is no mem_handle, this is a sparse mapping.
|
||||
@ -412,7 +415,7 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
}
|
||||
else {
|
||||
node = &va_range->node;
|
||||
pt_range_vec = &va_range->channel.pt_range_vec;
|
||||
pt_range_vec = &uvm_va_range_to_channel(va_range)->pt_range_vec;
|
||||
}
|
||||
|
||||
if (map_offset + uvm_range_tree_node_size(node) > mem_info->size)
|
||||
@ -593,7 +596,7 @@ static void uvm_release_rm_handle(struct nv_kref *ref)
|
||||
|
||||
static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_EXTERNAL_RANGE_PARAMS *params)
|
||||
{
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
uvm_va_range_external_t *external_range = NULL;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -611,7 +614,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
|
||||
// Create the new external VA range.
|
||||
// uvm_va_range_create_external handles any collisions when it attempts to
|
||||
// insert the new range into the va_space range tree.
|
||||
status = uvm_va_range_create_external(va_space, mm, params->base, params->length, &va_range);
|
||||
status = uvm_va_range_create_external(va_space, mm, params->base, params->length, &external_range);
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT_RL("Failed to create external VA range [0x%llx, 0x%llx)\n",
|
||||
params->base,
|
||||
@ -651,7 +654,7 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered. This also checks for if EGM owning GPU is registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
owning_gpu = uvm_va_space_get_gpu_by_mem_info(va_space, mem_info);
|
||||
if (!owning_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
@ -678,18 +681,19 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_ext_gpu_map_t *uvm_va_range_ext_gpu_map(uvm_va_range_t *va_range, uvm_gpu_t *mapping_gpu, NvU64 addr)
|
||||
static uvm_ext_gpu_map_t *uvm_va_range_ext_gpu_map(uvm_va_range_external_t *external_range,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
NvU64 addr)
|
||||
{
|
||||
uvm_ext_gpu_map_t *ext_gpu_map = NULL;
|
||||
uvm_range_tree_node_t *node;
|
||||
uvm_ext_gpu_range_tree_t *range_tree;
|
||||
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL);
|
||||
uvm_assert_rwsem_locked(&va_range->va_space->lock);
|
||||
uvm_assert_rwsem_locked(&external_range->va_range.va_space->lock);
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, mapping_gpu);
|
||||
|
||||
if (uvm_processor_mask_test(&va_range->external.mapped_gpus, mapping_gpu->id)) {
|
||||
if (uvm_processor_mask_test(&external_range->mapped_gpus, mapping_gpu->id)) {
|
||||
UVM_ASSERT(!uvm_range_tree_empty(&range_tree->tree));
|
||||
node = uvm_range_tree_find(&range_tree->tree, addr);
|
||||
if (node) {
|
||||
@ -759,13 +763,13 @@ static NV_STATUS uvm_ext_gpu_map_split(uvm_range_tree_t *tree,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_unmap_external_in_range(uvm_va_range_t *va_range,
|
||||
static NV_STATUS uvm_unmap_external_in_range(uvm_va_range_external_t *external_range,
|
||||
uvm_gpu_t *gpu,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
struct list_head *deferred_list)
|
||||
{
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(external_range, gpu);
|
||||
uvm_ext_gpu_map_t *ext_map, *ext_map_next = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -813,7 +817,7 @@ static NV_STATUS uvm_unmap_external_in_range(uvm_va_range_t *va_range,
|
||||
// 2. It needs to visit newly created uvm_ext_gpu_map_t, as a result of
|
||||
// splits. This means it can't use safe iterators as they will skip the
|
||||
// newly created uvm_ext_gpu_map_t.
|
||||
ext_map = uvm_ext_gpu_map_iter_first(va_range, gpu, start, end);
|
||||
ext_map = uvm_ext_gpu_map_iter_first(external_range, gpu, start, end);
|
||||
while (ext_map) {
|
||||
if (start > ext_map->node.start) {
|
||||
status = uvm_ext_gpu_map_split(&range_tree->tree, ext_map, start - 1, &ext_map_next);
|
||||
@ -828,10 +832,10 @@ static NV_STATUS uvm_unmap_external_in_range(uvm_va_range_t *va_range,
|
||||
ext_map_next = NULL;
|
||||
}
|
||||
else {
|
||||
ext_map_next = uvm_ext_gpu_map_iter_next(va_range, ext_map, end);
|
||||
ext_map_next = uvm_ext_gpu_map_iter_next(external_range, ext_map, end);
|
||||
}
|
||||
|
||||
uvm_ext_gpu_map_destroy(va_range, ext_map, deferred_list);
|
||||
uvm_ext_gpu_map_destroy(external_range, ext_map, deferred_list);
|
||||
}
|
||||
|
||||
ext_map = ext_map_next;
|
||||
@ -840,7 +844,7 @@ static NV_STATUS uvm_unmap_external_in_range(uvm_va_range_t *va_range,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_external_t *external_range,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
const uvm_rm_user_object_t *user_rm_mem,
|
||||
const uvm_map_rm_params_t *map_rm_params,
|
||||
@ -848,9 +852,9 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
NvU64 length,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_va_space_t *va_space = va_range->va_space;
|
||||
uvm_va_space_t *va_space = external_range->va_range.va_space;
|
||||
uvm_ext_gpu_map_t *ext_gpu_map = NULL;
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(external_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU64 mapping_page_size;
|
||||
@ -870,7 +874,7 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
uvm_mutex_lock(&range_tree->lock);
|
||||
|
||||
status = uvm_unmap_external_in_range(va_range, mapping_gpu, base, base + length - 1, NULL);
|
||||
status = uvm_unmap_external_in_range(external_range, mapping_gpu, base, base + length - 1, NULL);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -880,8 +884,8 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
goto error;
|
||||
}
|
||||
|
||||
// Insert the ext_gpu_map into the VA range immediately since some of the
|
||||
// below calls require it to be there.
|
||||
// Insert the ext_gpu_map into the external range immediately since some of
|
||||
// the below calls require it to be there.
|
||||
ext_gpu_map->node.start = base;
|
||||
ext_gpu_map->node.end = base + length - 1;
|
||||
RB_CLEAR_NODE(&ext_gpu_map->node.rb_node);
|
||||
@ -897,14 +901,14 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
status = uvm_range_tree_add(&range_tree->tree, &ext_gpu_map->node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_processor_mask_set_atomic(&va_range->external.mapped_gpus, mapping_gpu->id);
|
||||
uvm_processor_mask_set_atomic(&external_range->mapped_gpus, mapping_gpu->id);
|
||||
ext_gpu_map->gpu = mapping_gpu;
|
||||
ext_gpu_map->mem_handle->gpu = mapping_gpu;
|
||||
nv_kref_init(&ext_gpu_map->mem_handle->ref_count);
|
||||
|
||||
// Error paths after this point may call uvm_va_range_ext_gpu_map, so do a
|
||||
// sanity check now to make sure it doesn't trigger any asserts.
|
||||
UVM_ASSERT(uvm_va_range_ext_gpu_map(va_range, mapping_gpu, base) == ext_gpu_map);
|
||||
UVM_ASSERT(uvm_va_range_ext_gpu_map(external_range, mapping_gpu, base) == ext_gpu_map);
|
||||
|
||||
// Dup the memory. This verifies the input handles, takes a ref count on the
|
||||
// physical allocation so it can't go away under us, and returns us the
|
||||
@ -953,7 +957,12 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
|
||||
status = uvm_va_range_map_rm_allocation(&external_range->va_range,
|
||||
mapping_gpu,
|
||||
&mem_info,
|
||||
map_rm_params,
|
||||
ext_gpu_map,
|
||||
out_tracker);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -961,7 +970,7 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_ext_gpu_map_destroy(va_range, ext_gpu_map, NULL);
|
||||
uvm_ext_gpu_map_destroy(external_range, ext_gpu_map, NULL);
|
||||
uvm_mutex_unlock(&range_tree->lock);
|
||||
return status;
|
||||
}
|
||||
@ -969,7 +978,7 @@ error:
|
||||
// Actual implementation of UvmMapExternalAllocation
|
||||
static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_EXTERNAL_ALLOCATION_PARAMS *params)
|
||||
{
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
uvm_va_range_external_t *external_range = NULL;
|
||||
uvm_gpu_t *mapping_gpu;
|
||||
uvm_processor_mask_t *mapped_gpus;
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -994,11 +1003,10 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
va_range = uvm_va_range_find(va_space, params->base);
|
||||
external_range = uvm_va_range_external_find(va_space, params->base);
|
||||
|
||||
if (!va_range ||
|
||||
va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL ||
|
||||
va_range->node.end < params->base + params->length - 1) {
|
||||
if (!external_range ||
|
||||
external_range->va_range.node.end < params->base + params->length - 1) {
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_processor_mask_cache_free(mapped_gpus);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
@ -1030,7 +1038,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
||||
map_rm_params.format_type = params->perGpuAttributes[i].gpuFormatType;
|
||||
map_rm_params.element_bits = params->perGpuAttributes[i].gpuElementBits;
|
||||
map_rm_params.compression_type = params->perGpuAttributes[i].gpuCompressionType;
|
||||
status = uvm_map_external_allocation_on_gpu(va_range,
|
||||
status = uvm_map_external_allocation_on_gpu(external_range,
|
||||
mapping_gpu,
|
||||
&user_rm_mem,
|
||||
&map_rm_params,
|
||||
@ -1060,17 +1068,17 @@ error:
|
||||
|
||||
// Tear down only those mappings we created during this call
|
||||
for_each_va_space_gpu_in_mask(mapping_gpu, va_space, mapped_gpus) {
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(external_range, mapping_gpu);
|
||||
uvm_ext_gpu_map_t *ext_map, *ext_map_next;
|
||||
|
||||
uvm_mutex_lock(&range_tree->lock);
|
||||
uvm_ext_gpu_map_for_each_in_safe(ext_map,
|
||||
ext_map_next,
|
||||
va_range,
|
||||
external_range,
|
||||
mapping_gpu,
|
||||
params->base,
|
||||
params->base + params->length - 1)
|
||||
uvm_ext_gpu_map_destroy(va_range, ext_map, NULL);
|
||||
uvm_ext_gpu_map_destroy(external_range, ext_map, NULL);
|
||||
uvm_mutex_unlock(&range_tree->lock);
|
||||
}
|
||||
|
||||
@ -1091,15 +1099,15 @@ static NvU64 external_sparse_pte_maker(uvm_page_table_range_vec_t *range_vec, Nv
|
||||
return range_vec->tree->hal->make_sparse_pte();
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_map_external_sparse_on_gpu(uvm_va_range_t *va_range,
|
||||
static NV_STATUS uvm_map_external_sparse_on_gpu(uvm_va_range_external_t *external_range,
|
||||
uvm_gpu_t *mapping_gpu,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
struct list_head *deferred_free_list)
|
||||
{
|
||||
uvm_va_space_t *va_space = va_range->va_space;
|
||||
uvm_va_space_t *va_space = external_range->va_range.va_space;
|
||||
uvm_ext_gpu_map_t *ext_gpu_map = NULL;
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(external_range, mapping_gpu);
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
uvm_page_tree_t *page_tree;
|
||||
NV_STATUS status;
|
||||
@ -1115,7 +1123,7 @@ static NV_STATUS uvm_map_external_sparse_on_gpu(uvm_va_range_t *va_range,
|
||||
|
||||
uvm_mutex_lock(&range_tree->lock);
|
||||
|
||||
status = uvm_unmap_external_in_range(va_range, mapping_gpu, base, base + length - 1, deferred_free_list);
|
||||
status = uvm_unmap_external_in_range(external_range, mapping_gpu, base, base + length - 1, deferred_free_list);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -1135,10 +1143,10 @@ static NV_STATUS uvm_map_external_sparse_on_gpu(uvm_va_range_t *va_range,
|
||||
status = uvm_range_tree_add(&range_tree->tree, &ext_gpu_map->node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_processor_mask_set_atomic(&va_range->external.mapped_gpus, mapping_gpu->id);
|
||||
uvm_processor_mask_set_atomic(&external_range->mapped_gpus, mapping_gpu->id);
|
||||
ext_gpu_map->gpu = mapping_gpu;
|
||||
|
||||
UVM_ASSERT(uvm_va_range_ext_gpu_map(va_range, mapping_gpu, base) == ext_gpu_map);
|
||||
UVM_ASSERT(uvm_va_range_ext_gpu_map(external_range, mapping_gpu, base) == ext_gpu_map);
|
||||
|
||||
status = uvm_page_table_range_vec_init(page_tree,
|
||||
ext_gpu_map->node.start,
|
||||
@ -1160,14 +1168,14 @@ static NV_STATUS uvm_map_external_sparse_on_gpu(uvm_va_range_t *va_range,
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
uvm_ext_gpu_map_destroy(va_range, ext_gpu_map, NULL);
|
||||
uvm_ext_gpu_map_destroy(external_range, ext_gpu_map, NULL);
|
||||
uvm_mutex_unlock(&range_tree->lock);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_map_external_sparse(uvm_va_space_t *va_space, UVM_MAP_EXTERNAL_SPARSE_PARAMS *params)
|
||||
{
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
uvm_va_range_external_t *external_range = NULL;
|
||||
uvm_gpu_t *mapping_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
@ -1176,10 +1184,9 @@ static NV_STATUS uvm_map_external_sparse(uvm_va_space_t *va_space, UVM_MAP_EXTER
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
va_range = uvm_va_range_find(va_space, params->base);
|
||||
if (!va_range ||
|
||||
va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL ||
|
||||
va_range->node.end < params->base + params->length - 1) {
|
||||
external_range = uvm_va_range_external_find(va_space, params->base);
|
||||
if (!external_range ||
|
||||
external_range->va_range.node.end < params->base + params->length - 1) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto out;
|
||||
}
|
||||
@ -1196,7 +1203,11 @@ static NV_STATUS uvm_map_external_sparse(uvm_va_space_t *va_space, UVM_MAP_EXTER
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_map_external_sparse_on_gpu(va_range, mapping_gpu, params->base, params->length, &deferred_free_list);
|
||||
status = uvm_map_external_sparse_on_gpu(external_range,
|
||||
mapping_gpu,
|
||||
params->base,
|
||||
params->length,
|
||||
&deferred_free_list);
|
||||
|
||||
if (!list_empty(&deferred_free_list))
|
||||
uvm_gpu_retain(mapping_gpu);
|
||||
@ -1244,7 +1255,7 @@ void uvm_ext_gpu_map_free(uvm_ext_gpu_map_t *ext_gpu_map)
|
||||
uvm_gpu_release(owning_gpu);
|
||||
}
|
||||
|
||||
void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
|
||||
void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range,
|
||||
uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
struct list_head *deferred_free_list)
|
||||
{
|
||||
@ -1268,16 +1279,16 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
|
||||
|
||||
mapped_gpu = ext_gpu_map->gpu;
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, mapped_gpu);
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, mapped_gpu);
|
||||
|
||||
uvm_assert_mutex_locked(&range_tree->lock);
|
||||
UVM_ASSERT(uvm_gpu_va_space_get(va_range->va_space, mapped_gpu));
|
||||
UVM_ASSERT(uvm_gpu_va_space_get(external_range->va_range.va_space, mapped_gpu));
|
||||
|
||||
uvm_range_tree_remove(&range_tree->tree, &ext_gpu_map->node);
|
||||
|
||||
// Unmap the PTEs
|
||||
if (ext_gpu_map->pt_range_vec.ranges) {
|
||||
membar = va_range_downgrade_membar(va_range, ext_gpu_map);
|
||||
membar = va_range_downgrade_membar(&external_range->va_range, ext_gpu_map);
|
||||
uvm_page_table_range_vec_clear_ptes(&ext_gpu_map->pt_range_vec, membar);
|
||||
uvm_page_table_range_vec_deinit(&ext_gpu_map->pt_range_vec);
|
||||
}
|
||||
@ -1299,7 +1310,7 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
|
||||
// Check if the sub-range tree is empty. Only then can the GPU be removed from
|
||||
// the mapped_gpus bitmap.
|
||||
if (uvm_range_tree_empty(&range_tree->tree))
|
||||
uvm_processor_mask_clear_atomic(&va_range->external.mapped_gpus, mapped_gpu->id);
|
||||
uvm_processor_mask_clear_atomic(&external_range->mapped_gpus, mapped_gpu->id);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_unmap_external(uvm_va_space_t *va_space,
|
||||
@ -1307,7 +1318,7 @@ static NV_STATUS uvm_unmap_external(uvm_va_space_t *va_space,
|
||||
NvU64 length,
|
||||
const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_external_t *external_range;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_ext_gpu_range_tree_t *range_tree;
|
||||
@ -1318,8 +1329,8 @@ static NV_STATUS uvm_unmap_external(uvm_va_space_t *va_space,
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
va_range = uvm_va_range_find(va_space, base);
|
||||
if (!va_range || va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL || base + length - 1 > va_range->node.end) {
|
||||
external_range = uvm_va_range_external_find(va_space, base);
|
||||
if (!external_range || base + length - 1 > external_range->va_range.node.end) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto out;
|
||||
}
|
||||
@ -1330,9 +1341,9 @@ static NV_STATUS uvm_unmap_external(uvm_va_space_t *va_space,
|
||||
goto out;
|
||||
}
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, gpu);
|
||||
uvm_mutex_lock(&range_tree->lock);
|
||||
status = uvm_unmap_external_in_range(va_range, gpu, base, base + length - 1, &deferred_free_list);
|
||||
status = uvm_unmap_external_in_range(external_range, gpu, base, base + length - 1, &deferred_free_list);
|
||||
uvm_mutex_unlock(&range_tree->lock);
|
||||
|
||||
// If the deferred_free_list is not empty, retain the GPU which maps the
|
||||
@ -1359,13 +1370,14 @@ NV_STATUS uvm_api_unmap_external(UVM_UNMAP_EXTERNAL_PARAMS *params, struct file
|
||||
}
|
||||
|
||||
// This destroys VA ranges created by UvmMapExternalAllocation,
|
||||
// UvmMapDynamicParallelismRegion, and UvmAllocSemaphorePool *only*. VA ranges
|
||||
// created by UvmMemMap and UvmAlloc go through mmap/munmap.
|
||||
// UvmMapDynamicParallelismRegion, UvmAllocDeviceP2P and UvmAllocSemaphorePool
|
||||
// *only*. VA ranges created by UvmMemMap and UvmAlloc go through mmap/munmap.
|
||||
static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_processor_mask_t *retained_mask = NULL;
|
||||
uvm_gpu_t *retained_gpu = NULL;
|
||||
LIST_HEAD(deferred_free_list);
|
||||
|
||||
if (uvm_api_range_invalid_4k(base, length))
|
||||
@ -1382,6 +1394,7 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
if (!va_range ||
|
||||
(va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL &&
|
||||
va_range->type != UVM_VA_RANGE_TYPE_SKED_REFLECTED &&
|
||||
va_range->type != UVM_VA_RANGE_TYPE_DEVICE_P2P &&
|
||||
va_range->type != UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) ||
|
||||
va_range->node.start != base ||
|
||||
va_range->node.end != base + length - 1) {
|
||||
@ -1390,7 +1403,7 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
}
|
||||
|
||||
if ((va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) &&
|
||||
uvm_mem_mapped_on_cpu_user(va_range->semaphore_pool.mem)) {
|
||||
uvm_mem_mapped_on_cpu_user(uvm_va_range_to_semaphore_pool(va_range)->mem)) {
|
||||
// Semaphore pools must be first unmapped from the CPU with munmap to
|
||||
// invalidate the vma.
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
@ -1398,25 +1411,37 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
||||
}
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
||||
retained_mask = va_range->external.retained_mask;
|
||||
uvm_va_range_external_t *external_range = uvm_va_range_to_external(va_range);
|
||||
|
||||
retained_mask = external_range->retained_mask;
|
||||
|
||||
// Set the retained_mask to NULL to prevent
|
||||
// uvm_va_range_destroy_external() from freeing the mask.
|
||||
va_range->external.retained_mask = NULL;
|
||||
external_range->retained_mask = NULL;
|
||||
|
||||
UVM_ASSERT(retained_mask);
|
||||
|
||||
// External ranges may have deferred free work, so the GPUs may have to
|
||||
// be retained. Construct the mask of all the GPUs that need to be
|
||||
// retained.
|
||||
uvm_processor_mask_and(retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
||||
uvm_processor_mask_and(retained_mask, &external_range->mapped_gpus, &va_space->registered_gpus);
|
||||
}
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_DEVICE_P2P) {
|
||||
uvm_va_range_device_p2p_t *device_p2p_range = uvm_va_range_to_device_p2p(va_range);
|
||||
|
||||
retained_gpu = device_p2p_range->gpu;
|
||||
}
|
||||
|
||||
uvm_va_range_destroy(va_range, &deferred_free_list);
|
||||
|
||||
// If there is deferred work, retain the required GPUs.
|
||||
if (!list_empty(&deferred_free_list))
|
||||
uvm_global_gpu_retain(retained_mask);
|
||||
if (!list_empty(&deferred_free_list)) {
|
||||
if (retained_mask)
|
||||
uvm_global_gpu_retain(retained_mask);
|
||||
else
|
||||
uvm_gpu_retain(retained_gpu);
|
||||
}
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
@ -1424,7 +1449,10 @@ out:
|
||||
if (!list_empty(&deferred_free_list)) {
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
uvm_global_gpu_release(retained_mask);
|
||||
if (retained_mask)
|
||||
uvm_global_gpu_release(retained_mask);
|
||||
else
|
||||
uvm_gpu_release(retained_gpu);
|
||||
}
|
||||
|
||||
// Free the mask allocated in uvm_va_range_create_external() since
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -40,33 +40,35 @@ typedef struct
|
||||
UvmGpuCompressionType compression_type;
|
||||
} uvm_map_rm_params_t;
|
||||
|
||||
static uvm_ext_gpu_range_tree_t *uvm_ext_gpu_range_tree(uvm_va_range_t *va_range, uvm_gpu_t *gpu)
|
||||
static uvm_ext_gpu_range_tree_t *uvm_ext_gpu_range_tree(uvm_va_range_external_t *external_range, uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL);
|
||||
|
||||
return &va_range->external.gpu_ranges[uvm_id_gpu_index(gpu->id)];
|
||||
return &external_range->gpu_ranges[uvm_id_gpu_index(gpu->id)];
|
||||
}
|
||||
|
||||
// Returns the first external map (if any) in the gpu's range tree.
|
||||
// va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
|
||||
// The caller must hold the range tree lock.
|
||||
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_first(uvm_va_range_t *va_range, uvm_gpu_t *gpu, NvU64 start, NvU64 end)
|
||||
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_first(uvm_va_range_external_t *external_range,
|
||||
uvm_gpu_t *gpu,
|
||||
NvU64 start,
|
||||
NvU64 end)
|
||||
{
|
||||
uvm_ext_gpu_range_tree_t *range_tree;
|
||||
uvm_range_tree_node_t *node;
|
||||
|
||||
UVM_ASSERT(start >= va_range->node.start);
|
||||
UVM_ASSERT(end <= va_range->node.end);
|
||||
UVM_ASSERT(start >= external_range->va_range.node.start);
|
||||
UVM_ASSERT(end <= external_range->va_range.node.end);
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, gpu);
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, gpu);
|
||||
node = uvm_range_tree_iter_first(&range_tree->tree, start, end);
|
||||
return uvm_ext_gpu_map_container(node);
|
||||
}
|
||||
|
||||
// Returns the external map following the provided map (if any) in address order from
|
||||
// the gpu's range tree. va_range should be of type UVM_VA_RANGE_TYPE_EXTERNAL.
|
||||
// the gpu's range tree.
|
||||
// The caller must hold the range tree lock.
|
||||
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_next(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map, NvU64 end)
|
||||
static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_next(uvm_va_range_external_t *external_range,
|
||||
uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
NvU64 end)
|
||||
{
|
||||
uvm_ext_gpu_range_tree_t *range_tree;
|
||||
uvm_range_tree_node_t *node;
|
||||
@ -74,37 +76,41 @@ static uvm_ext_gpu_map_t *uvm_ext_gpu_map_iter_next(uvm_va_range_t *va_range, uv
|
||||
if (!ext_gpu_map)
|
||||
return NULL;
|
||||
|
||||
UVM_ASSERT(end <= va_range->node.end);
|
||||
UVM_ASSERT(end <= external_range->va_range.node.end);
|
||||
|
||||
range_tree = uvm_ext_gpu_range_tree(va_range, ext_gpu_map->gpu);
|
||||
range_tree = uvm_ext_gpu_range_tree(external_range, ext_gpu_map->gpu);
|
||||
node = uvm_range_tree_iter_next(&range_tree->tree, &ext_gpu_map->node, end);
|
||||
return uvm_ext_gpu_map_container(node);
|
||||
}
|
||||
|
||||
// The four iterators below require that the caller hold the gpu's range tree
|
||||
// lock.
|
||||
#define uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, start, end) \
|
||||
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)); \
|
||||
(ext_gpu_map); \
|
||||
(ext_gpu_map) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
|
||||
#define uvm_ext_gpu_map_for_each_in(ext_gpu_map, external_range, gpu, start, end) \
|
||||
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((external_range), (gpu), (start), (end)); \
|
||||
(ext_gpu_map); \
|
||||
(ext_gpu_map) = uvm_ext_gpu_map_iter_next((external_range), (ext_gpu_map), (end)))
|
||||
|
||||
#define uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu, start, end) \
|
||||
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((va_range), (gpu), (start), (end)), \
|
||||
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)); \
|
||||
(ext_gpu_map); \
|
||||
(ext_gpu_map) = (ext_gpu_map_next), \
|
||||
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((va_range), (ext_gpu_map), (end)))
|
||||
#define uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, ext_gpu_map_next, external_range, gpu, start, end) \
|
||||
for ((ext_gpu_map) = uvm_ext_gpu_map_iter_first((external_range), (gpu), (start), (end)), \
|
||||
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((external_range), (ext_gpu_map), (end)); \
|
||||
(ext_gpu_map); \
|
||||
(ext_gpu_map) = (ext_gpu_map_next), \
|
||||
(ext_gpu_map_next) = uvm_ext_gpu_map_iter_next((external_range), (ext_gpu_map), (end)))
|
||||
|
||||
#define uvm_ext_gpu_map_for_each(ext_gpu_map, va_range, gpu) \
|
||||
uvm_ext_gpu_map_for_each_in(ext_gpu_map, va_range, gpu, (va_range)->node.start, (va_range)->node.end)
|
||||
#define uvm_ext_gpu_map_for_each(ext_gpu_map, external_range, gpu) \
|
||||
uvm_ext_gpu_map_for_each_in(ext_gpu_map, \
|
||||
external_range, \
|
||||
gpu, \
|
||||
(external_range)->va_range.node.start, \
|
||||
(external_range)->va_range.node.end)
|
||||
|
||||
#define uvm_ext_gpu_map_for_each_safe(ext_gpu_map, ext_gpu_map_next, va_range, gpu) \
|
||||
uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, \
|
||||
ext_gpu_map_next, \
|
||||
va_range, \
|
||||
gpu, \
|
||||
(va_range)->node.start, \
|
||||
(va_range)->node.end)
|
||||
#define uvm_ext_gpu_map_for_each_safe(ext_gpu_map, ext_gpu_map_next, external_range, gpu) \
|
||||
uvm_ext_gpu_map_for_each_in_safe(ext_gpu_map, \
|
||||
ext_gpu_map_next, \
|
||||
external_range, \
|
||||
gpu, \
|
||||
(external_range)->va_range.node.start, \
|
||||
(external_range)->va_range.node.end)
|
||||
|
||||
// User-facing APIs (uvm_api_map_external_allocation, uvm_api_free) are declared
|
||||
// uvm_api.h.
|
||||
@ -141,7 +147,7 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
//
|
||||
// The caller must hold the range tree lock for the mapping gpu and is
|
||||
// responsible for making sure that mapping gpu is retained across those calls.
|
||||
void uvm_ext_gpu_map_destroy(uvm_va_range_t *va_range,
|
||||
void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range,
|
||||
uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
struct list_head *deferred_free_list);
|
||||
|
||||
|
@ -1051,8 +1051,6 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
// (uvm_conf_computing_dma_buffer_pool_t). Because we would typically
|
||||
// already hold the DMA_BUFFER_POOL lock at this time, we cannot hold
|
||||
// the block lock. Allocate PTEs without eviction in this context.
|
||||
//
|
||||
// See uvm_pmm_gpu_alloc()
|
||||
if (uvm_mem_is_sysmem_dma(mem))
|
||||
pmm_flags = UVM_PMM_ALLOC_FLAGS_NONE;
|
||||
|
||||
|
@ -97,7 +97,8 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
}
|
||||
|
||||
uvm_push_set_description(&push,
|
||||
"Memcopy %zd bytes from virtual sys_mem 0x%llx to %s mem 0x%llx [mem loc: %s, page size: %u]",
|
||||
"Memcopy %zu bytes from virtual sys_mem 0x%llx to %s mem 0x%llx [mem loc: %s, page "
|
||||
"size: %llu]",
|
||||
size_this_time,
|
||||
sys_mem_gpu_address.address,
|
||||
mem_gpu_address.is_virtual ? "virtual" : "physical",
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -60,14 +60,14 @@ module_param(uvm_perf_migrate_cpu_preunmap_block_order, uint, S_IRUGO);
|
||||
static bool g_uvm_perf_migrate_cpu_preunmap_enable __read_mostly;
|
||||
static NvU64 g_uvm_perf_migrate_cpu_preunmap_size __read_mostly;
|
||||
|
||||
static bool is_migration_single_block(uvm_va_range_t *first_va_range, NvU64 base, NvU64 length)
|
||||
static bool is_migration_single_block(uvm_va_range_managed_t *first_managed_range, NvU64 base, NvU64 length)
|
||||
{
|
||||
NvU64 end = base + length - 1;
|
||||
|
||||
if (end > first_va_range->node.end)
|
||||
if (end > first_managed_range->va_range.node.end)
|
||||
return false;
|
||||
|
||||
return uvm_va_range_block_index(first_va_range, base) == uvm_va_range_block_index(first_va_range, end);
|
||||
return uvm_va_range_block_index(first_managed_range, base) == uvm_va_range_block_index(first_managed_range, end);
|
||||
}
|
||||
|
||||
static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
|
||||
@ -236,7 +236,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
else {
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
uvm_va_policy_t *policy = &va_block->managed_range->policy;
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
|
||||
status = uvm_va_block_make_resident_read_duplicate(va_block,
|
||||
@ -401,28 +401,27 @@ static bool va_block_should_do_cpu_preunmap(uvm_va_block_t *va_block,
|
||||
return num_cpu_unchanged_pages == 0;
|
||||
}
|
||||
|
||||
static void preunmap_multi_block(uvm_va_range_t *va_range,
|
||||
static void preunmap_multi_block(uvm_va_range_managed_t *managed_range,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
uvm_processor_id_t dest_id)
|
||||
{
|
||||
size_t i;
|
||||
const size_t first_block_index = uvm_va_range_block_index(va_range, start);
|
||||
const size_t last_block_index = uvm_va_range_block_index(va_range, end);
|
||||
const size_t first_block_index = uvm_va_range_block_index(managed_range, start);
|
||||
const size_t last_block_index = uvm_va_range_block_index(managed_range, end);
|
||||
NvU32 num_unmap_pages = 0;
|
||||
|
||||
UVM_ASSERT(start >= va_range->node.start);
|
||||
UVM_ASSERT(end <= va_range->node.end);
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
uvm_assert_rwsem_locked(&va_range->va_space->lock);
|
||||
UVM_ASSERT(start >= managed_range->va_range.node.start);
|
||||
UVM_ASSERT(end <= managed_range->va_range.node.end);
|
||||
uvm_assert_rwsem_locked(&managed_range->va_range.va_space->lock);
|
||||
|
||||
UVM_ASSERT(uvm_range_group_all_migratable(va_range->va_space, start, end));
|
||||
UVM_ASSERT(uvm_range_group_all_migratable(managed_range->va_range.va_space, start, end));
|
||||
|
||||
for (i = first_block_index; i <= last_block_index; i++) {
|
||||
NvU32 num_block_unmap_pages;
|
||||
|
||||
if (!va_block_should_do_cpu_preunmap(uvm_va_range_block(va_range, i),
|
||||
if (!va_block_should_do_cpu_preunmap(uvm_va_range_block(managed_range, i),
|
||||
va_block_context,
|
||||
start,
|
||||
end,
|
||||
@ -435,10 +434,10 @@ static void preunmap_multi_block(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
if (num_unmap_pages > 0)
|
||||
unmap_mapping_range(va_range->va_space->mapping, start, end - start + 1, 1);
|
||||
unmap_mapping_range(managed_range->va_range.va_space->mapping, start, end - start + 1, 1);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_managed_t *managed_range,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
@ -447,22 +446,21 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
size_t i;
|
||||
const size_t first_block_index = uvm_va_range_block_index(va_range, start);
|
||||
const size_t last_block_index = uvm_va_range_block_index(va_range, end);
|
||||
const size_t first_block_index = uvm_va_range_block_index(managed_range, start);
|
||||
const size_t last_block_index = uvm_va_range_block_index(managed_range, end);
|
||||
|
||||
UVM_ASSERT(start >= va_range->node.start);
|
||||
UVM_ASSERT(end <= va_range->node.end);
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
uvm_assert_rwsem_locked(&va_range->va_space->lock);
|
||||
UVM_ASSERT(start >= managed_range->va_range.node.start);
|
||||
UVM_ASSERT(end <= managed_range->va_range.node.end);
|
||||
uvm_assert_rwsem_locked(&managed_range->va_range.va_space->lock);
|
||||
|
||||
UVM_ASSERT(uvm_range_group_all_migratable(va_range->va_space, start, end));
|
||||
UVM_ASSERT(uvm_range_group_all_migratable(managed_range->va_range.va_space, start, end));
|
||||
|
||||
// Iterate over blocks, populating them if necessary
|
||||
for (i = first_block_index; i <= last_block_index; i++) {
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_va_block_region_t region;
|
||||
uvm_va_block_t *va_block;
|
||||
NV_STATUS status = uvm_va_range_block_create(va_range, i, &va_block);
|
||||
NV_STATUS status = uvm_va_range_block_create(managed_range, i, &va_block);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -487,7 +485,7 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
static NV_STATUS uvm_va_range_migrate(uvm_va_range_managed_t *managed_range,
|
||||
uvm_service_block_context_t *service_context,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
@ -497,9 +495,10 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NvU64 preunmap_range_start = start;
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
||||
uvm_va_policy_t *policy = &managed_range->policy;
|
||||
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(policy, va_range->va_space);
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap &&
|
||||
va_range_should_do_cpu_preunmap(policy, managed_range->va_range.va_space);
|
||||
|
||||
// Divide migrations into groups of contiguous VA blocks. This is to trigger
|
||||
// CPU unmaps for that region before the migration starts.
|
||||
@ -511,7 +510,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
preunmap_range_end = UVM_ALIGN_UP(preunmap_range_start + 1, g_uvm_perf_migrate_cpu_preunmap_size);
|
||||
preunmap_range_end = min(preunmap_range_end - 1, end);
|
||||
|
||||
preunmap_multi_block(va_range,
|
||||
preunmap_multi_block(managed_range,
|
||||
service_context->block_context,
|
||||
preunmap_range_start,
|
||||
preunmap_range_end,
|
||||
@ -521,7 +520,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
preunmap_range_end = end;
|
||||
}
|
||||
|
||||
status = uvm_va_range_migrate_multi_block(va_range,
|
||||
status = uvm_va_range_migrate_multi_block(managed_range,
|
||||
service_context,
|
||||
preunmap_range_start,
|
||||
preunmap_range_end,
|
||||
@ -539,7 +538,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
|
||||
static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
uvm_va_range_t *first_va_range,
|
||||
uvm_va_range_managed_t *first_managed_range,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
@ -547,39 +546,33 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
bool should_do_cpu_preunmap,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_va_range_t *va_range, *va_range_last;
|
||||
uvm_va_range_managed_t *managed_range, *managed_range_last;
|
||||
NvU64 end = base + length - 1;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool skipped_migrate = false;
|
||||
|
||||
if (!first_va_range) {
|
||||
// For HMM, we iterate over va_blocks since there is no va_range.
|
||||
if (!first_managed_range) {
|
||||
// For HMM, we iterate over va_blocks since there is no managed_range.
|
||||
return uvm_hmm_migrate_ranges(va_space, service_context, base, length, dest_id, mode, out_tracker);
|
||||
}
|
||||
|
||||
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
||||
UVM_ASSERT(first_managed_range == uvm_va_space_iter_managed_first(va_space, base, base));
|
||||
|
||||
va_range_last = NULL;
|
||||
uvm_for_each_va_range_in_contig_from(va_range, va_space, first_va_range, end) {
|
||||
managed_range_last = NULL;
|
||||
uvm_for_each_va_range_managed_in_contig_from(managed_range, va_space, first_managed_range, end) {
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
||||
uvm_va_policy_t *policy = &managed_range->policy;
|
||||
|
||||
va_range_last = va_range;
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// Only managed ranges can be migrated
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
break;
|
||||
}
|
||||
|
||||
// For UVM-Lite GPUs, the CUDA driver may suballocate a single va_range
|
||||
// into many range groups. For this reason, we iterate over each va_range first
|
||||
// then through the range groups within.
|
||||
// For UVM-Lite GPUs, the CUDA driver may suballocate a single
|
||||
// managed_range into many range groups. For this reason, we iterate
|
||||
// over each managed_range first then through the range groups within.
|
||||
uvm_range_group_for_each_migratability_in(&iter,
|
||||
va_space,
|
||||
max(base, va_range->node.start),
|
||||
min(end, va_range->node.end)) {
|
||||
// Skip non-migratable VA ranges
|
||||
max(base, managed_range->va_range.node.start),
|
||||
min(end, managed_range->va_range.node.end)) {
|
||||
// Skip non-migratable ranges
|
||||
if (!iter.migratable) {
|
||||
// Only return NV_WARN_MORE_PROCESSING_REQUIRED if the pages aren't
|
||||
// already resident at dest_id.
|
||||
@ -588,7 +581,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
service_context->block_context->make_resident.dest_nid))
|
||||
skipped_migrate = true;
|
||||
}
|
||||
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
||||
else if (uvm_processor_mask_test(&managed_range->va_range.uvm_lite_gpus, dest_id) &&
|
||||
!uvm_va_policy_preferred_location_equal(policy, dest_id, NUMA_NO_NODE)) {
|
||||
// Don't migrate to a non-faultable GPU that is in UVM-Lite mode,
|
||||
// unless it's the preferred location
|
||||
@ -596,7 +589,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
break;
|
||||
}
|
||||
else {
|
||||
status = uvm_va_range_migrate(va_range,
|
||||
status = uvm_va_range_migrate(managed_range,
|
||||
service_context,
|
||||
iter.start,
|
||||
iter.end,
|
||||
@ -614,7 +607,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
return status;
|
||||
|
||||
// Check that we were able to iterate over the entire range without any gaps
|
||||
if (!va_range_last || va_range_last->node.end < end)
|
||||
if (!managed_range_last || managed_range_last->va_range.node.end < end)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (skipped_migrate)
|
||||
@ -630,7 +623,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t dest_id,
|
||||
int dest_nid,
|
||||
NvU32 migrate_flags,
|
||||
uvm_va_range_t *first_va_range,
|
||||
uvm_va_range_managed_t *first_managed_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -644,12 +637,12 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
// If the GPU has its memory disabled, just skip the migration and let
|
||||
// faults take care of things.
|
||||
if (!uvm_va_space_processor_has_memory(va_space, dest_id))
|
||||
if (!uvm_processor_has_memory(dest_id))
|
||||
return NV_OK;
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
else if (!first_va_range)
|
||||
else if (!first_managed_range)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
service_context = uvm_service_block_context_alloc(mm);
|
||||
@ -677,8 +670,8 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
// 2- Go block by block reexecuting the transfer (in case someone moved it
|
||||
// since the first pass), and adding the mappings.
|
||||
//
|
||||
// For HMM (!first_va_range), we always do a single pass.
|
||||
is_single_block = !first_va_range || is_migration_single_block(first_va_range, base, length);
|
||||
// For HMM (!first_managed_range), we always do a single pass.
|
||||
is_single_block = !first_managed_range || is_migration_single_block(first_managed_range, base, length);
|
||||
do_mappings = UVM_ID_IS_GPU(dest_id) || !(migrate_flags & UVM_MIGRATE_FLAG_SKIP_CPU_MAP);
|
||||
do_two_passes = do_mappings && !is_single_block;
|
||||
|
||||
@ -687,7 +680,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
status = uvm_migrate_ranges(va_space,
|
||||
service_context,
|
||||
first_va_range,
|
||||
first_managed_range,
|
||||
base,
|
||||
length,
|
||||
dest_id,
|
||||
@ -705,7 +698,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
status = uvm_migrate_ranges(va_space,
|
||||
service_context,
|
||||
first_va_range,
|
||||
first_managed_range,
|
||||
base,
|
||||
length,
|
||||
dest_id,
|
||||
@ -792,7 +785,7 @@ static void semaphore_release_from_cpu(uvm_mem_t *semaphore_mem, NvU64 semaphore
|
||||
|
||||
semaphore_cpu_va = (char *) uvm_mem_get_cpu_addr_kernel(semaphore_mem) + semaphore_offset;
|
||||
|
||||
UVM_WRITE_ONCE(*(NvU32 *)semaphore_cpu_va, semaphore_payload);
|
||||
WRITE_ONCE(*(NvU32 *)semaphore_cpu_va, semaphore_payload);
|
||||
}
|
||||
|
||||
static NV_STATUS semaphore_release(NvU64 semaphore_address,
|
||||
@ -872,7 +865,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_tracker_t *tracker_ptr = NULL;
|
||||
uvm_gpu_t *dest_gpu = NULL;
|
||||
uvm_va_range_t *sema_va_range = NULL;
|
||||
uvm_va_range_semaphore_pool_t *sema_va_range = NULL;
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool flush_events = false;
|
||||
@ -917,9 +910,9 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
}
|
||||
}
|
||||
else {
|
||||
sema_va_range = uvm_va_range_find(va_space, params->semaphoreAddress);
|
||||
sema_va_range = uvm_va_range_semaphore_pool_find(va_space, params->semaphoreAddress);
|
||||
if (!IS_ALIGNED(params->semaphoreAddress, sizeof(params->semaphorePayload)) ||
|
||||
!sema_va_range || sema_va_range->type != UVM_VA_RANGE_TYPE_SEMAPHORE_POOL) {
|
||||
!sema_va_range) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto done;
|
||||
}
|
||||
@ -980,18 +973,19 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
if (type == UVM_API_RANGE_TYPE_ATS) {
|
||||
uvm_migrate_args_t uvm_migrate_args =
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = params->base,
|
||||
.length = params->length,
|
||||
.dst_id = dest_id,
|
||||
.dst_node_id = cpu_numa_node,
|
||||
.populate_permissions = UVM_POPULATE_PERMISSIONS_INHERIT,
|
||||
.touch = false,
|
||||
.skip_mapped = false,
|
||||
.populate_on_cpu_alloc_failures = false,
|
||||
.user_space_start = ¶ms->userSpaceStart,
|
||||
.user_space_length = ¶ms->userSpaceLength,
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = params->base,
|
||||
.length = params->length,
|
||||
.dst_id = dest_id,
|
||||
.dst_node_id = cpu_numa_node,
|
||||
.populate_permissions = UVM_POPULATE_PERMISSIONS_INHERIT,
|
||||
.touch = false,
|
||||
.skip_mapped = false,
|
||||
.populate_on_cpu_alloc_failures = false,
|
||||
.populate_on_migrate_vma_failures = true,
|
||||
.user_space_start = ¶ms->userSpaceStart,
|
||||
.user_space_length = ¶ms->userSpaceLength,
|
||||
};
|
||||
|
||||
status = uvm_migrate_pageable(&uvm_migrate_args);
|
||||
@ -1004,7 +998,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
dest_id,
|
||||
(UVM_ID_IS_CPU(dest_id) ? cpu_numa_node : NUMA_NO_NODE),
|
||||
params->flags,
|
||||
uvm_va_space_iter_first(va_space, params->base, params->base),
|
||||
uvm_va_space_iter_managed_first(va_space, params->base, params->base),
|
||||
tracker_ptr);
|
||||
}
|
||||
}
|
||||
@ -1025,7 +1019,7 @@ done:
|
||||
if (params->semaphoreAddress && (status == NV_OK)) {
|
||||
status = semaphore_release(params->semaphoreAddress,
|
||||
params->semaphorePayload,
|
||||
&sema_va_range->semaphore_pool,
|
||||
sema_va_range,
|
||||
dest_gpu,
|
||||
tracker_ptr);
|
||||
}
|
||||
@ -1104,9 +1098,9 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
status = NV_ERR_OUT_OF_RANGE;
|
||||
}
|
||||
else {
|
||||
uvm_va_range_t *first_va_range = uvm_va_space_iter_first(va_space, start, start);
|
||||
uvm_va_range_managed_t *first_managed_range = uvm_va_space_iter_managed_first(va_space, start, start);
|
||||
|
||||
if (!first_va_range || first_va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
if (!first_managed_range) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto done;
|
||||
}
|
||||
@ -1118,7 +1112,7 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
dest_id,
|
||||
NUMA_NO_NODE,
|
||||
migrate_flags,
|
||||
first_va_range,
|
||||
first_managed_range,
|
||||
&local_tracker);
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
uvm_gpu_address_t *gpu_addr)
|
||||
{
|
||||
uvm_va_space_t *va_space = state->uvm_migrate_args->va_space;
|
||||
uvm_gpu_t *owning_gpu = UVM_ID_IS_CPU(resident_id)? NULL: uvm_va_space_get_gpu(va_space, resident_id);
|
||||
uvm_gpu_t *owning_gpu = UVM_ID_IS_CPU(resident_id)? NULL: uvm_gpu_get(resident_id);
|
||||
const bool can_copy_from = uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(copying_gpu->id)],
|
||||
resident_id);
|
||||
|
||||
@ -111,8 +111,8 @@ static NV_STATUS migrate_vma_zero_begin_push(uvm_va_space_t *va_space,
|
||||
channel_type,
|
||||
push,
|
||||
"Zero %s from %s VMA region [0x%lx, 0x%lx]",
|
||||
uvm_va_space_processor_name(va_space, dst_id),
|
||||
uvm_va_space_processor_name(va_space, gpu->id),
|
||||
uvm_processor_get_name(dst_id),
|
||||
uvm_processor_get_name(gpu->id),
|
||||
start,
|
||||
outer);
|
||||
}
|
||||
@ -130,20 +130,20 @@ static NV_STATUS migrate_vma_copy_begin_push(uvm_va_space_t *va_space,
|
||||
|
||||
UVM_ASSERT_MSG(!uvm_id_equal(src_id, dst_id),
|
||||
"Unexpected copy to self, processor %s\n",
|
||||
uvm_va_space_processor_name(va_space, src_id));
|
||||
uvm_processor_get_name(src_id));
|
||||
|
||||
if (UVM_ID_IS_CPU(src_id)) {
|
||||
gpu = uvm_va_space_get_gpu(va_space, dst_id);
|
||||
gpu = uvm_gpu_get(dst_id);
|
||||
channel_type = UVM_CHANNEL_TYPE_CPU_TO_GPU;
|
||||
}
|
||||
else if (UVM_ID_IS_CPU(dst_id)) {
|
||||
gpu = uvm_va_space_get_gpu(va_space, src_id);
|
||||
gpu = uvm_gpu_get(src_id);
|
||||
channel_type = UVM_CHANNEL_TYPE_GPU_TO_CPU;
|
||||
}
|
||||
else {
|
||||
// For GPU to GPU copies, prefer to "push" the data from the source as
|
||||
// that works better
|
||||
gpu = uvm_va_space_get_gpu(va_space, src_id);
|
||||
gpu = uvm_gpu_get(src_id);
|
||||
|
||||
channel_type = UVM_CHANNEL_TYPE_GPU_TO_GPU;
|
||||
}
|
||||
@ -154,24 +154,24 @@ static NV_STATUS migrate_vma_copy_begin_push(uvm_va_space_t *va_space,
|
||||
if (!gpu->mem_info.numa.enabled) {
|
||||
UVM_ASSERT_MSG(uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(gpu->id)], dst_id),
|
||||
"GPU %s dst %s src %s\n",
|
||||
uvm_va_space_processor_name(va_space, gpu->id),
|
||||
uvm_va_space_processor_name(va_space, dst_id),
|
||||
uvm_va_space_processor_name(va_space, src_id));
|
||||
uvm_processor_get_name(gpu->id),
|
||||
uvm_processor_get_name(dst_id),
|
||||
uvm_processor_get_name(src_id));
|
||||
UVM_ASSERT_MSG(uvm_processor_mask_test(&va_space->can_copy_from[uvm_id_value(gpu->id)], src_id),
|
||||
"GPU %s dst %s src %s\n",
|
||||
uvm_va_space_processor_name(va_space, gpu->id),
|
||||
uvm_va_space_processor_name(va_space, dst_id),
|
||||
uvm_va_space_processor_name(va_space, src_id));
|
||||
uvm_processor_get_name(gpu->id),
|
||||
uvm_processor_get_name(dst_id),
|
||||
uvm_processor_get_name(src_id));
|
||||
}
|
||||
|
||||
if (channel_type == UVM_CHANNEL_TYPE_GPU_TO_GPU) {
|
||||
uvm_gpu_t *dst_gpu = uvm_va_space_get_gpu(va_space, dst_id);
|
||||
uvm_gpu_t *dst_gpu = uvm_gpu_get(dst_id);
|
||||
return uvm_push_begin_gpu_to_gpu(gpu->channel_manager,
|
||||
dst_gpu,
|
||||
push,
|
||||
"Copy from %s to %s for VMA region [0x%lx, 0x%lx]",
|
||||
uvm_va_space_processor_name(va_space, src_id),
|
||||
uvm_va_space_processor_name(va_space, dst_id),
|
||||
uvm_processor_get_name(src_id),
|
||||
uvm_processor_get_name(dst_id),
|
||||
start,
|
||||
outer);
|
||||
}
|
||||
@ -180,8 +180,8 @@ static NV_STATUS migrate_vma_copy_begin_push(uvm_va_space_t *va_space,
|
||||
channel_type,
|
||||
push,
|
||||
"Copy from %s to %s for VMA region [0x%lx, 0x%lx]",
|
||||
uvm_va_space_processor_name(va_space, src_id),
|
||||
uvm_va_space_processor_name(va_space, dst_id),
|
||||
uvm_processor_get_name(src_id),
|
||||
uvm_processor_get_name(dst_id),
|
||||
start,
|
||||
outer);
|
||||
}
|
||||
@ -356,7 +356,7 @@ static NV_STATUS migrate_vma_populate_anon_pages(struct vm_area_struct *vma,
|
||||
copying_gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
}
|
||||
else {
|
||||
copying_gpu = uvm_va_space_get_gpu(va_space, dst_id);
|
||||
copying_gpu = uvm_gpu_get(dst_id);
|
||||
}
|
||||
|
||||
UVM_ASSERT(copying_gpu);
|
||||
@ -928,7 +928,7 @@ static NV_STATUS migrate_pageable(migrate_vma_state_t *state)
|
||||
|
||||
status = migrate_pageable_vma(vma, start, outer, state, &next_addr);
|
||||
if (status == NV_WARN_NOTHING_TO_DO) {
|
||||
NV_STATUS populate_status = NV_OK;
|
||||
NV_STATUS populate_status;
|
||||
bool touch = uvm_migrate_args->touch;
|
||||
uvm_populate_permissions_t populate_permissions = uvm_migrate_args->populate_permissions;
|
||||
|
||||
@ -948,8 +948,16 @@ static NV_STATUS migrate_pageable(migrate_vma_state_t *state)
|
||||
if (current->mm != mm && !(current->flags & PF_KTHREAD))
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
// Populate pages with uvm_populate_pageable
|
||||
populate_status = uvm_populate_pageable_vma(vma, start, length, 0, touch, populate_permissions);
|
||||
// Populate pages with uvm_populate_pageable if requested.
|
||||
if (uvm_migrate_args->populate_on_migrate_vma_failures) {
|
||||
populate_status = uvm_populate_pageable_vma(vma, start, length, 0, touch, populate_permissions);
|
||||
}
|
||||
else {
|
||||
*user_space_start = start;
|
||||
*user_space_length = outer - start;
|
||||
populate_status = NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
if (populate_status == NV_OK) {
|
||||
*user_space_start = max(vma->vm_start, start);
|
||||
*user_space_length = min(vma->vm_end, outer) - *user_space_start;
|
||||
@ -983,7 +991,6 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
{
|
||||
migrate_vma_state_t *state = NULL;
|
||||
NV_STATUS status;
|
||||
uvm_va_space_t *va_space = uvm_migrate_args->va_space;
|
||||
uvm_processor_id_t dst_id = uvm_migrate_args->dst_id;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(uvm_migrate_args->start));
|
||||
@ -997,7 +1004,7 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
else {
|
||||
// Incoming dst_node_id is only valid if dst_id belongs to the CPU. Use
|
||||
// dst_node_id as the GPU node id if dst_id doesn't belong to the CPU.
|
||||
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(uvm_va_space_get_gpu(va_space, dst_id));
|
||||
uvm_migrate_args->dst_node_id = uvm_gpu_numa_node(uvm_gpu_get(dst_id));
|
||||
}
|
||||
|
||||
state = kmem_cache_alloc(g_uvm_migrate_vma_state_cache, NV_UVM_GFP_FLAGS);
|
||||
|
@ -44,6 +44,7 @@ typedef struct
|
||||
bool touch : 1;
|
||||
bool skip_mapped : 1;
|
||||
bool populate_on_cpu_alloc_failures : 1;
|
||||
bool populate_on_migrate_vma_failures : 1;
|
||||
NvU64 *user_space_start;
|
||||
NvU64 *user_space_length;
|
||||
} uvm_migrate_args_t;
|
||||
|
@ -50,18 +50,18 @@
|
||||
// because that type is normally associated with the LCE mapped to the most
|
||||
// PCEs. The higher bandwidth is beneficial when doing bulk operations such as
|
||||
// clearing PTEs, or initializing a page directory/table.
|
||||
#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({ \
|
||||
NV_STATUS status; \
|
||||
uvm_channel_manager_t *manager = (tree)->gpu->channel_manager; \
|
||||
\
|
||||
if (manager == NULL) \
|
||||
status = uvm_push_begin_fake((tree)->gpu, (push)); \
|
||||
else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent)) \
|
||||
status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__); \
|
||||
else \
|
||||
status = uvm_push_begin_acquire(manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
|
||||
\
|
||||
status; \
|
||||
#define page_tree_begin_acquire(tree, tracker, push, format, ...) ({ \
|
||||
NV_STATUS __status; \
|
||||
uvm_channel_manager_t *__manager = (tree)->gpu->channel_manager; \
|
||||
\
|
||||
if (__manager == NULL) \
|
||||
__status = uvm_push_begin_fake((tree)->gpu, (push)); \
|
||||
else if (uvm_parent_gpu_is_virt_mode_sriov_heavy((tree)->gpu->parent)) \
|
||||
__status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_MEMOPS, (tracker), (push), (format), ##__VA_ARGS__); \
|
||||
else \
|
||||
__status = uvm_push_begin_acquire(__manager, UVM_CHANNEL_TYPE_GPU_INTERNAL, (tracker), (push), (format), ##__VA_ARGS__);\
|
||||
\
|
||||
__status; \
|
||||
})
|
||||
|
||||
// Default location of page table allocations
|
||||
@ -2368,7 +2368,7 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
||||
peer_mapping = uvm_gpu_get_peer_mapping(gpu, peer->id);
|
||||
phys_offset = 0ULL;
|
||||
|
||||
if (uvm_gpus_are_nvswitch_connected(gpu, peer)) {
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, peer->parent)) {
|
||||
// Add the 47-bit physical address routing bits for this peer to the
|
||||
// generated PTEs
|
||||
phys_offset = peer->parent->nvswitch_info.fabric_memory_window_start;
|
||||
@ -2658,7 +2658,7 @@ NV_STATUS uvm_mmu_chunk_map(uvm_gpu_chunk_t *chunk)
|
||||
// mappings are reference counted as multiples of PAGE_SIZE. User chunk
|
||||
// sizes are guaranteed to be a multiple of that page size, but kernel chunk
|
||||
// sizes can be smaller.
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(chunk_size));
|
||||
|
||||
@ -2705,7 +2705,7 @@ void uvm_mmu_chunk_unmap(uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker)
|
||||
chunk_size = uvm_gpu_chunk_get_size(chunk);
|
||||
num_unmapped_pages = chunk_size / PAGE_SIZE;
|
||||
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
UVM_ASSERT(PAGE_ALIGNED(chunk_size));
|
||||
|
||||
root_chunk_mapping = root_chunk_mapping_from_chunk(gpu, chunk);
|
||||
|
@ -114,7 +114,7 @@ typedef union
|
||||
|
||||
// Only one of these two can be set. The other one must be NULL
|
||||
uvm_va_block_t *block;
|
||||
uvm_va_range_t *range;
|
||||
uvm_va_range_managed_t *range;
|
||||
} module_unload;
|
||||
|
||||
struct
|
||||
@ -151,6 +151,8 @@ typedef union
|
||||
{
|
||||
NvU64 fault_va;
|
||||
|
||||
NvU32 cpu_num;
|
||||
|
||||
bool is_write;
|
||||
|
||||
NvU64 pc;
|
||||
@ -181,8 +183,8 @@ typedef union
|
||||
|
||||
// For CPU-to-CPU migrations, these two fields indicate the source
|
||||
// and destination NUMA node IDs.
|
||||
NvU16 dst_nid;
|
||||
NvU16 src_nid;
|
||||
NvS16 dst_nid;
|
||||
NvS16 src_nid;
|
||||
|
||||
// Start address of the memory range being migrated
|
||||
NvU64 address;
|
||||
@ -336,8 +338,8 @@ static inline void uvm_perf_event_notify_migration_cpu(uvm_perf_va_space_events_
|
||||
.block = va_block,
|
||||
.dst = UVM_ID_CPU,
|
||||
.src = UVM_ID_CPU,
|
||||
.src_nid = (NvU16)src_nid,
|
||||
.dst_nid = (NvU16)dst_nid,
|
||||
.src_nid = (NvS16)src_nid,
|
||||
.dst_nid = (NvS16)dst_nid,
|
||||
.address = address,
|
||||
.bytes = bytes,
|
||||
.transfer_mode = transfer_mode,
|
||||
@ -384,6 +386,7 @@ static inline void uvm_perf_event_notify_cpu_fault(uvm_perf_va_space_events_t *v
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 fault_va,
|
||||
bool is_write,
|
||||
NvU32 cpu_num,
|
||||
NvU64 pc)
|
||||
{
|
||||
uvm_perf_event_data_t event_data =
|
||||
@ -397,9 +400,10 @@ static inline void uvm_perf_event_notify_cpu_fault(uvm_perf_va_space_events_t *v
|
||||
}
|
||||
};
|
||||
|
||||
event_data.fault.cpu.fault_va = fault_va,
|
||||
event_data.fault.cpu.is_write = is_write,
|
||||
event_data.fault.cpu.pc = pc,
|
||||
event_data.fault.cpu.fault_va = fault_va;
|
||||
event_data.fault.cpu.is_write = is_write;
|
||||
event_data.fault.cpu.pc = pc;
|
||||
event_data.fault.cpu.cpu_num = cpu_num;
|
||||
|
||||
uvm_perf_event_notify(va_space_events, UVM_PERF_EVENT_FAULT, &event_data);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -78,7 +78,7 @@ error:
|
||||
void uvm_perf_module_unload(uvm_perf_module_t *module, uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_perf_event_data_t event_data;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_block_t *block;
|
||||
size_t i;
|
||||
|
||||
@ -89,12 +89,10 @@ void uvm_perf_module_unload(uvm_perf_module_t *module, uvm_va_space_t *va_space)
|
||||
|
||||
event_data.module_unload.module = module;
|
||||
|
||||
// Iterate over all va_range/va_blocks in the va_space
|
||||
uvm_for_each_va_range(va_range, va_space) {
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
continue;
|
||||
// Iterate over all managed ranges/va_blocks in the va_space
|
||||
uvm_for_each_va_range_managed(managed_range, va_space) {
|
||||
|
||||
for_each_va_block_in_va_range(va_range, block) {
|
||||
for_each_va_block_in_va_range(managed_range, block) {
|
||||
uvm_mutex_lock(&block->lock);
|
||||
|
||||
// Notify a fake va_block destruction to destroy the module-allocated data
|
||||
@ -106,7 +104,7 @@ void uvm_perf_module_unload(uvm_perf_module_t *module, uvm_va_space_t *va_space)
|
||||
}
|
||||
// Notify a fake va_range destruction to destroy the module-allocated data
|
||||
event_data.module_unload.block = NULL;
|
||||
event_data.module_unload.range = va_range;
|
||||
event_data.module_unload.range = managed_range;
|
||||
uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_MODULE_UNLOAD, &event_data);
|
||||
}
|
||||
|
||||
|
@ -260,7 +260,7 @@ static void update_bitmap_tree_from_va_block(uvm_perf_prefetch_bitmap_tree_t *bi
|
||||
// registered in the current process for this GPU.
|
||||
if (UVM_ID_IS_GPU(new_residency) &&
|
||||
uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, new_residency)) {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, new_residency);
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(new_residency);
|
||||
|
||||
big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -223,9 +223,9 @@ typedef struct
|
||||
// uvm_procfs_is_debug_enabled() returns true.
|
||||
static processor_thrashing_stats_t g_cpu_thrashing_stats;
|
||||
|
||||
#define PROCESSOR_THRASHING_STATS_INC(va_space, proc, field) \
|
||||
#define PROCESSOR_THRASHING_STATS_INC(proc, field) \
|
||||
do { \
|
||||
processor_thrashing_stats_t *_processor_stats = thrashing_stats_get_or_null(va_space, proc); \
|
||||
processor_thrashing_stats_t *_processor_stats = thrashing_stats_get_or_null(proc); \
|
||||
if (_processor_stats) \
|
||||
atomic64_inc(&_processor_stats->field); \
|
||||
} while (0)
|
||||
@ -474,7 +474,7 @@ static processor_thrashing_stats_t *gpu_thrashing_stats_get_or_null(uvm_gpu_t *g
|
||||
return uvm_perf_module_type_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||||
}
|
||||
|
||||
static processor_thrashing_stats_t *thrashing_stats_get_or_null(uvm_va_space_t *va_space, uvm_processor_id_t id)
|
||||
static processor_thrashing_stats_t *thrashing_stats_get_or_null(uvm_processor_id_t id)
|
||||
{
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
if (g_cpu_thrashing_stats.procfs_file)
|
||||
@ -483,7 +483,7 @@ static processor_thrashing_stats_t *thrashing_stats_get_or_null(uvm_va_space_t *
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return gpu_thrashing_stats_get_or_null(uvm_va_space_get_gpu(va_space, id));
|
||||
return gpu_thrashing_stats_get_or_null(uvm_gpu_get(id));
|
||||
}
|
||||
|
||||
// Create the thrashing stats struct for the given GPU
|
||||
@ -1034,7 +1034,7 @@ static void thrashing_detected(uvm_va_block_t *va_block,
|
||||
if (!uvm_page_mask_test_and_set(&block_thrashing->thrashing_pages, page_index))
|
||||
++block_thrashing->num_thrashing_pages;
|
||||
|
||||
PROCESSOR_THRASHING_STATS_INC(va_space, processor_id, num_thrashing);
|
||||
PROCESSOR_THRASHING_STATS_INC(processor_id, num_thrashing);
|
||||
|
||||
UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
|
||||
}
|
||||
@ -1269,7 +1269,7 @@ void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_
|
||||
// read duplication is supported.
|
||||
read_duplication = uvm_va_block_is_hmm(va_block) ?
|
||||
UVM_READ_DUPLICATION_UNSET :
|
||||
uvm_va_range_get_policy(va_block->va_range)->read_duplication;
|
||||
va_block->managed_range->policy.read_duplication;
|
||||
|
||||
// We only care about migrations due to replayable faults, access
|
||||
// counters and page prefetching. For non-replayable faults, UVM will
|
||||
@ -1405,7 +1405,16 @@ static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
||||
uvm_processor_mask_and(fast_to,
|
||||
&va_space->has_nvlink[uvm_id_value(to)],
|
||||
&va_space->has_native_atomics[uvm_id_value(to)]);
|
||||
uvm_processor_mask_set(fast_to, to);
|
||||
if (UVM_ID_IS_CPU(to)) {
|
||||
uvm_processor_mask_set(fast_to, to);
|
||||
}
|
||||
else {
|
||||
// Include registered SMC peers and the processor 'to'.
|
||||
uvm_processor_mask_range_fill(fast_to,
|
||||
uvm_gpu_id_from_sub_processor(uvm_parent_gpu_id_from_gpu_id(to), 0),
|
||||
UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
uvm_processor_mask_and(fast_to, fast_to, &va_space->registered_gpu_va_spaces);
|
||||
}
|
||||
|
||||
return uvm_processor_mask_subset(&page_thrashing->processors, fast_to);
|
||||
}
|
||||
@ -1491,10 +1500,10 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
else if (!preferred_location_is_thrashing(preferred_location, page_thrashing) &&
|
||||
thrashing_processors_have_fast_access_to(va_space, va_block_context, page_thrashing, closest_resident_id)){
|
||||
// This is a fast path for those scenarios in which all thrashing
|
||||
// processors have fast (NVLINK + native atomics) access to the current
|
||||
// residency. This is skipped if the preferred location is thrashing and
|
||||
// not accessible by the rest of thrashing processors. Otherwise, we
|
||||
// would be in the condition above.
|
||||
// processors have fast access (NVLINK + native atomics or SMC peers)
|
||||
// to the current residency. This is skipped if the preferred location
|
||||
// is thrashing and not accessible by the rest of thrashing processors.
|
||||
// Otherwise, we would be in the condition above.
|
||||
if (UVM_ID_IS_CPU(closest_resident_id)) {
|
||||
// On P9 systems, we prefer the CPU to map vidmem (since it can
|
||||
// cache it), so don't map the GPU to sysmem.
|
||||
@ -1577,8 +1586,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
hint.pin.residency = requester;
|
||||
}
|
||||
|
||||
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN &&
|
||||
!uvm_va_space_processor_has_memory(va_space, hint.pin.residency))
|
||||
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN && !uvm_processor_has_memory(hint.pin.residency))
|
||||
hint.pin.residency = UVM_ID_CPU;
|
||||
|
||||
return hint;
|
||||
@ -1741,9 +1749,9 @@ done:
|
||||
}
|
||||
else {
|
||||
if (uvm_id_equal(hint.pin.residency, requester))
|
||||
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_local);
|
||||
PROCESSOR_THRASHING_STATS_INC(requester, num_pin_local);
|
||||
else
|
||||
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_remote);
|
||||
PROCESSOR_THRASHING_STATS_INC(requester, num_pin_remote);
|
||||
|
||||
uvm_processor_mask_copy(&hint.pin.processors, &page_thrashing->processors);
|
||||
}
|
||||
@ -1756,7 +1764,7 @@ done:
|
||||
page_index,
|
||||
requester);
|
||||
|
||||
PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_throttle);
|
||||
PROCESSOR_THRASHING_STATS_INC(requester, num_throttle);
|
||||
|
||||
hint.throttle.end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
|
||||
}
|
||||
@ -2102,15 +2110,12 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
|
||||
// When disabling thrashing detection, destroy the thrashing tracking
|
||||
// information for all VA blocks and unpin pages
|
||||
if (!va_space_thrashing->params.enable) {
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
|
||||
uvm_for_each_va_range(va_range, va_space) {
|
||||
uvm_for_each_va_range_managed(managed_range, va_space) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
continue;
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
for_each_va_block_in_va_range(managed_range, va_block) {
|
||||
uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
|
||||
|
@ -50,7 +50,7 @@
|
||||
// root_chunk_lock()) as synchronizing any pending operations might take a long
|
||||
// time and it would be undesirable for that to block other operations of PMM.
|
||||
// Notably some synchronization is required as part of allocation to handle GPU
|
||||
// lifetime issues across VA spaces (see comments in uvm_pmm_gpu_alloc()). Bit
|
||||
// lifetime issues across VA spaces (see comments in pmm_gpu_alloc()). Bit
|
||||
// locks (instead of a mutex in each root chunk) are used to save space.
|
||||
//
|
||||
// All free chunks (UVM_PMM_GPU_CHUNK_STATE_FREE) are kept on free lists, with
|
||||
@ -87,8 +87,9 @@
|
||||
// chunk become free, they are merged into one bigger chunk. See
|
||||
// free_chunk_with_merges().
|
||||
//
|
||||
// Splitting and merging already allocated chunks is also exposed to the users of
|
||||
// allocated chunks. See uvm_pmm_gpu_split_chunk() and uvm_pmm_gpu_merge_chunk().
|
||||
// Splitting and merging already allocated chunks is also exposed to the users
|
||||
// of allocated chunks. See uvm_pmm_gpu_split_chunk() and
|
||||
// uvm_pmm_gpu_merge_chunk().
|
||||
//
|
||||
// As splits and merges are protected by a single PMM mutex, they are only
|
||||
// performed when really necessary. See alloc_chunk() that falls back to split
|
||||
@ -170,6 +171,7 @@
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_linux.h"
|
||||
|
||||
@ -464,7 +466,7 @@ bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type)
|
||||
}
|
||||
}
|
||||
|
||||
static bool memory_type_is_protected(uvm_pmm_gpu_memory_type_t type)
|
||||
bool uvm_pmm_gpu_memory_type_is_protected(uvm_pmm_gpu_memory_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case UVM_PMM_GPU_MEMORY_TYPE_USER: // Alias UVM_PMM_GPU_MEMORY_TYPE_USER_PROTECTED
|
||||
@ -477,8 +479,9 @@ static bool memory_type_is_protected(uvm_pmm_gpu_memory_type_t type)
|
||||
|
||||
static void uvm_gpu_chunk_set_in_eviction(uvm_gpu_chunk_t *chunk, bool in_eviction)
|
||||
{
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
|
||||
|
||||
chunk->in_eviction = in_eviction;
|
||||
}
|
||||
|
||||
@ -546,13 +549,13 @@ static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_pmm_gpu_memory_type_
|
||||
return UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t mem_type,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
static NV_STATUS pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t mem_type,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
NV_STATUS status;
|
||||
@ -620,16 +623,17 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t memory_type,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status;
|
||||
size_t i;
|
||||
NV_STATUS status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
|
||||
|
||||
status = pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_KERNEL, flags, chunks, out_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -645,13 +649,23 @@ static NV_STATUS pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker);
|
||||
}
|
||||
|
||||
static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
|
||||
uvm_assert_spinlock_locked(&pmm->list_lock);
|
||||
|
||||
if (uvm_pmm_gpu_memory_type_is_user(chunk->type)) {
|
||||
if (uvm_gpu_chunk_is_user(chunk)) {
|
||||
if (chunk_is_root_chunk_pinned(pmm, chunk)) {
|
||||
UVM_ASSERT(root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT ||
|
||||
root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
@ -677,7 +691,7 @@ static void gpu_unpin_temp(uvm_pmm_gpu_t *pmm,
|
||||
bool is_referenced)
|
||||
{
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
|
||||
INIT_LIST_HEAD(&chunk->list);
|
||||
|
||||
@ -782,7 +796,8 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
uvm_gpu_chunk_t *child = chunk->suballoc->subchunks[i];
|
||||
|
||||
UVM_ASSERT(child->state == first_child->state);
|
||||
if (first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
|
||||
if ((first_child->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(first_child)) {
|
||||
uvm_gpu_chunk_t *prev_child = chunk->suballoc->subchunks[i-1];
|
||||
|
||||
UVM_ASSERT(child->va_block == child_va_block);
|
||||
@ -825,11 +840,12 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
// the subchunk state.
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
child_state = chunk->suballoc->subchunks[0]->state;
|
||||
subchunk = chunk->suballoc->subchunks[0];
|
||||
child_state = subchunk->state;
|
||||
|
||||
if (child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
subchunk = chunk->suballoc->subchunks[0];
|
||||
if ((child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(subchunk)) {
|
||||
UVM_ASSERT(subchunk->va_block);
|
||||
|
||||
chunk->va_block = subchunk->va_block;
|
||||
chunk->va_block_page_index = subchunk->va_block_page_index;
|
||||
chunk->is_referenced = subchunk->is_referenced;
|
||||
@ -845,7 +861,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
// The resulting chunk is assumed to be non-zero as a simplification,
|
||||
// instead of checking that all the subchunks are zero, since callers of
|
||||
// uvm_pmm_gpu_alloc are not required to clear it. However, we think that
|
||||
// pmm_gpu_alloc are not required to clear it. However, we think that
|
||||
// this covers all relevant cases since it is uncommon to split a chunk and
|
||||
// not to use any of the subchunks later on.
|
||||
chunk->is_zero = false;
|
||||
@ -859,7 +875,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
// merge.
|
||||
UVM_ASSERT(list_empty(&subchunk->list));
|
||||
|
||||
if (child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED)
|
||||
if ((child_state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(subchunk))
|
||||
UVM_ASSERT(subchunk->va_block != NULL);
|
||||
|
||||
kmem_cache_free(CHUNK_CACHE, subchunk);
|
||||
@ -1191,7 +1207,7 @@ uvm_gpu_phys_address_t uvm_pmm_gpu_peer_phys_address(uvm_pmm_gpu_t *pmm,
|
||||
uvm_aperture_t aperture = uvm_gpu_peer_aperture(accessing_gpu, gpu);
|
||||
NvU64 addr;
|
||||
|
||||
if (uvm_gpus_are_nvswitch_connected(accessing_gpu, gpu))
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(accessing_gpu->parent, gpu->parent))
|
||||
addr = chunk->address + gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
else
|
||||
addr = chunk->address;
|
||||
@ -1215,7 +1231,9 @@ uvm_gpu_address_t uvm_pmm_gpu_peer_copy_address(uvm_pmm_gpu_t *pmm,
|
||||
return uvm_gpu_address_virtual(gpu_peer_mapping->base + chunk->address);
|
||||
}
|
||||
|
||||
static NV_STATUS evict_root_chunk_from_va_block(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chunk_t *root_chunk, uvm_va_block_t *va_block)
|
||||
static NV_STATUS evict_root_chunk_from_va_block(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_root_chunk_t *root_chunk,
|
||||
uvm_va_block_t *va_block)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
NV_STATUS status;
|
||||
@ -1477,7 +1495,7 @@ static void root_chunk_update_eviction_list(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_MAX);
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
@ -1797,8 +1815,8 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm,
|
||||
NvU32 i;
|
||||
uvm_gpu_chunk_t *parent;
|
||||
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == cur_size);
|
||||
UVM_ASSERT(chunk->type == type);
|
||||
UVM_ASSERT(uvm_gpu_chunk_get_size(chunk) == cur_size);
|
||||
UVM_ASSERT(chunk->type == type);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
if (chunk->parent) {
|
||||
@ -1953,8 +1971,7 @@ NV_STATUS alloc_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
//
|
||||
// TODO: Bug 2446832: Most (all?) kernel chunks don't require scrubbing.
|
||||
// Also, user pages that are about to be overwritten, don't need to be
|
||||
// zeroed, either. Add an interface to uvm_pmm_gpu_alloc for callers to
|
||||
// specify when they don't need zeroed pages.
|
||||
// zeroed, either. Add a flag to uvm_pmm_gpu_alloc_* to skip scrubbing.
|
||||
const bool skip_pma_scrubbing = gpu->mem_info.numa.enabled;
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(type) || uvm_pmm_gpu_memory_type_is_kernel(type));
|
||||
|
||||
@ -1973,7 +1990,7 @@ NV_STATUS alloc_root_chunk(uvm_pmm_gpu_t *pmm,
|
||||
|
||||
// When the Confidential Computing feature is enabled, allocate GPU memory
|
||||
// in the protected region, unless specified otherwise.
|
||||
if (g_uvm_global.conf_computing_enabled && memory_type_is_protected(type))
|
||||
if (g_uvm_global.conf_computing_enabled && uvm_pmm_gpu_memory_type_is_protected(type))
|
||||
options.flags |= UVM_PMA_ALLOCATE_PROTECTED_REGION;
|
||||
|
||||
if (!gpu->parent->rm_info.isSimulated &&
|
||||
@ -2063,7 +2080,8 @@ void free_root_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chunk_t *root_chunk, free_
|
||||
|
||||
status = uvm_tracker_wait_deinit(&root_chunk->tracker);
|
||||
if (status != NV_OK) {
|
||||
// TODO: Bug 1766184: Handle RC/ECC. For now just go ahead and free the chunk anyway.
|
||||
// TODO: Bug 1766184: Handle RC/ECC. For now just go ahead and free the
|
||||
// chunk anyway.
|
||||
UVM_ASSERT(uvm_global_get_status() != NV_OK);
|
||||
}
|
||||
|
||||
@ -2161,9 +2179,10 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
// The child inherits the parent's state.
|
||||
subchunk->state = chunk->state;
|
||||
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
if ((chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) && uvm_gpu_chunk_is_user(chunk)) {
|
||||
UVM_ASSERT(chunk->va_block);
|
||||
uvm_assert_mutex_locked(&chunk->va_block->lock);
|
||||
|
||||
subchunk->va_block = chunk->va_block;
|
||||
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
|
||||
subchunk->is_referenced = chunk->is_referenced;
|
||||
@ -2185,7 +2204,8 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
// -1 for the parent chunk that is going to transition into the split state.
|
||||
// -1 for the parent chunk that is going to transition into the split
|
||||
// state.
|
||||
root_chunk->chunk.suballoc->pinned_leaf_chunks += num_sub - 1;
|
||||
|
||||
// When a pinned root chunk gets split, the count starts at 0 not
|
||||
@ -2680,9 +2700,9 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
|
||||
|
||||
// Make sure that all pending allocations, that could have started before
|
||||
// the eviction callback was called, are done. This is required to guarantee
|
||||
// that any address that, PMA thinks, is owned by UVM has been indeed recorded
|
||||
// in PMM's state. Taking the pma_lock in write mode will make sure all
|
||||
// readers (pending allocations and frees) are done, but will also
|
||||
// that any address that, PMA thinks, is owned by UVM has been indeed
|
||||
// recorded in PMM's state. Taking the pma_lock in write mode will make sure
|
||||
// all readers (pending allocations and frees) are done, but will also
|
||||
// unnecessarily stop new allocations from starting until it's released.
|
||||
// TODO: Bug 1795559: SRCU would likely be better for this type of
|
||||
// synchronization, but that's GPL. Figure out whether we can do anything
|
||||
@ -2704,7 +2724,7 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
if (chunk->state != UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED) {
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
UVM_ASSERT(uvm_gpu_chunk_is_user(chunk));
|
||||
|
||||
if (chunk_is_evictable(pmm, chunk)) {
|
||||
chunk_start_eviction(pmm, chunk);
|
||||
@ -2722,7 +2742,7 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
|
||||
} while (!eviction_started && chunk->state != UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED);
|
||||
|
||||
// The eviction callback gets called with a physical range that might be
|
||||
// only partially allocated by UVM. Skip the chunks that UVM doesn't own.
|
||||
// only partially allocated by UVM. Skip the chunks that UVM doesn't own
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_PMA_OWNED)
|
||||
continue;
|
||||
|
||||
@ -2998,9 +3018,9 @@ static NV_STATUS get_chunk_mappings_in_range(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t
|
||||
reverse_map = &get_chunk_mappings_data->mappings[get_chunk_mappings_data->num_mappings];
|
||||
|
||||
reverse_map->va_block = chunk->va_block;
|
||||
reverse_map->region = uvm_va_block_region(chunk->va_block_page_index,
|
||||
chunk->va_block_page_index + uvm_gpu_chunk_get_size(chunk) / PAGE_SIZE);
|
||||
reverse_map->owner = gpu->id;
|
||||
reverse_map->region = uvm_va_block_region(chunk->va_block_page_index,
|
||||
chunk->va_block_page_index + uvm_gpu_chunk_get_size(chunk) / PAGE_SIZE);
|
||||
reverse_map->owner = gpu->id;
|
||||
|
||||
// If we land in the middle of a chunk, adjust the offset
|
||||
if (get_chunk_mappings_data->phys_start > chunk->address) {
|
||||
@ -3039,9 +3059,9 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
|
||||
uvm_gpu_chunk_t *chunk = &root_chunk->chunk;
|
||||
get_chunk_mappings_data_t get_chunk_mappings_data;
|
||||
|
||||
get_chunk_mappings_data.phys_start = phys_addr;
|
||||
get_chunk_mappings_data.phys_end = phys_addr + size_in_chunk - 1;
|
||||
get_chunk_mappings_data.mappings = out_mappings + num_mappings;
|
||||
get_chunk_mappings_data.phys_start = phys_addr;
|
||||
get_chunk_mappings_data.phys_end = phys_addr + size_in_chunk - 1;
|
||||
get_chunk_mappings_data.mappings = out_mappings + num_mappings;
|
||||
get_chunk_mappings_data.num_mappings = 0;
|
||||
|
||||
// Walk the chunks for the current root chunk
|
||||
@ -3305,6 +3325,83 @@ static bool uvm_pmm_gpu_check_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
}
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
|
||||
static void device_p2p_page_free_wake(struct nv_kref *ref)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = container_of(ref, uvm_device_p2p_mem_t, refcount);
|
||||
wake_up(&p2p_mem->waitq);
|
||||
}
|
||||
|
||||
static void device_p2p_page_free(struct page *page)
|
||||
{
|
||||
uvm_device_p2p_mem_t *p2p_mem = page->zone_device_data;
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
nv_kref_put(&p2p_mem->refcount, device_p2p_page_free_wake);
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops uvm_device_p2p_pgmap_ops =
|
||||
{
|
||||
.page_free = device_p2p_page_free,
|
||||
};
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu)
|
||||
{
|
||||
unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
|
||||
uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
|
||||
unsigned long pci_end_pfn = pci_start_pfn + (gpu->mem_info.static_bar1_size >> PAGE_SHIFT);
|
||||
struct page *p2p_page;
|
||||
|
||||
gpu->device_p2p_initialised = false;
|
||||
uvm_mutex_init(&gpu->device_p2p_lock, UVM_LOCK_ORDER_GLOBAL);
|
||||
|
||||
if (uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
// A coherent system uses normal struct pages.
|
||||
gpu->device_p2p_initialised = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// RM sets this when it has created a contiguous BAR mapping large enough to
|
||||
// cover all of GPU memory that will be allocated to userspace buffers. This
|
||||
// is required to support the P2PDMA feature to ensure we have a P2PDMA page
|
||||
// available for every mapping.
|
||||
if (!gpu->mem_info.static_bar1_size)
|
||||
return;
|
||||
|
||||
if (pci_p2pdma_add_resource(gpu->parent->pci_dev, uvm_device_p2p_static_bar(gpu), 0, 0)) {
|
||||
UVM_ERR_PRINT("Unable to initialse PCI P2PDMA pages\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// The current upstream PCIe P2PDMA architecture does not allow drivers to
|
||||
// specify a page_free callback. We plan to work with upstream maintainers
|
||||
// to resolve this but in the mean time we can work around the issue by
|
||||
// overwriting the existing dev_pagemap_ops struct with our own.
|
||||
// TODO: Bug 4672502: [Linux Upstream][UVM] Allow drivers to manage and
|
||||
// allocate PCI P2PDMA pages directly
|
||||
p2p_page = pfn_to_page(pci_start_pfn);
|
||||
p2p_page->pgmap->ops = &uvm_device_p2p_pgmap_ops;
|
||||
for (; page_to_pfn(p2p_page) < pci_end_pfn; p2p_page++)
|
||||
p2p_page->zone_device_data = NULL;
|
||||
|
||||
gpu->device_p2p_initialised = true;
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu)
|
||||
{
|
||||
unsigned long pci_start_pfn = pci_resource_start(gpu->parent->pci_dev,
|
||||
uvm_device_p2p_static_bar(gpu)) >> PAGE_SHIFT;
|
||||
struct page *p2p_page;
|
||||
|
||||
if (gpu->device_p2p_initialised && !uvm_parent_gpu_is_coherent(gpu->parent)) {
|
||||
p2p_page = pfn_to_page(pci_start_pfn);
|
||||
devm_memunmap_pages(&gpu->parent->pci_dev->dev, p2p_page->pgmap);
|
||||
}
|
||||
|
||||
gpu->device_p2p_initialised = false;
|
||||
}
|
||||
#endif // CONFIG_PCI_P2PDMA
|
||||
|
||||
static void process_lazy_free(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
@ -3346,8 +3443,8 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t i, j, k;
|
||||
|
||||
// UVM_CHUNK_SIZE_INVALID is UVM_CHUNK_SIZE_MAX shifted left by 1. This protects
|
||||
// UVM_CHUNK_SIZE_INVALID from being negative
|
||||
// UVM_CHUNK_SIZE_INVALID is UVM_CHUNK_SIZE_MAX shifted left by 1. This
|
||||
// protects UVM_CHUNK_SIZE_INVALID from being negative
|
||||
BUILD_BUG_ON(UVM_CHUNK_SIZE_MAX >= UVM_CHUNK_SIZE_INVALID);
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
@ -3637,7 +3734,8 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
|
||||
|
||||
if (!root_chunk) {
|
||||
// Not finding a chunk to evict is not considered an error, the caller
|
||||
// can inspect the targeted_chunk_size to see whether anything was evicted.
|
||||
// can inspect the targeted_chunk_size to see whether anything was
|
||||
// evicted.
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -3860,8 +3958,8 @@ NV_STATUS uvm_test_pmm_query_pma_stats(UVM_TEST_PMM_QUERY_PMA_STATS_PARAMS *para
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
params->pma_stats.numFreePages64k = UVM_READ_ONCE(gpu->pmm.pma_stats->numFreePages64k);
|
||||
params->pma_stats.numFreePages2m = UVM_READ_ONCE(gpu->pmm.pma_stats->numFreePages2m);
|
||||
params->pma_stats.numFreePages64k = READ_ONCE(gpu->pmm.pma_stats->numFreePages64k);
|
||||
params->pma_stats.numFreePages2m = READ_ONCE(gpu->pmm.pma_stats->numFreePages2m);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
return NV_OK;
|
||||
|
@ -59,7 +59,7 @@
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_types.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
#if UVM_IS_CONFIG_HMM() || defined(CONFIG_PCI_P2PDMA)
|
||||
#include <linux/memremap.h>
|
||||
#endif
|
||||
|
||||
@ -126,6 +126,8 @@ static bool uvm_pmm_gpu_memory_type_is_kernel(uvm_pmm_gpu_memory_type_t type)
|
||||
return !uvm_pmm_gpu_memory_type_is_user(type);
|
||||
}
|
||||
|
||||
bool uvm_pmm_gpu_memory_type_is_protected(uvm_pmm_gpu_memory_type_t type);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Chunk belongs to PMA. Code outside PMM should not have access to
|
||||
@ -144,11 +146,13 @@ typedef enum
|
||||
|
||||
// Chunk is temporarily pinned.
|
||||
//
|
||||
// This state is used for user memory chunks that have been allocated, but haven't
|
||||
// been unpinned yet and also internally when a chunk is about to be split.
|
||||
// This state is used for user memory chunks that have been allocated, but
|
||||
// haven't been unpinned yet and also internally when a chunk is about to be
|
||||
// split.
|
||||
UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED,
|
||||
|
||||
// Chunk is allocated. That is it is backing some VA block
|
||||
// Chunk is allocated. In the case of a user chunk, this state implies that
|
||||
// the chunk is backing a VA block.
|
||||
UVM_PMM_GPU_CHUNK_STATE_ALLOCATED,
|
||||
|
||||
// Number of states - MUST BE LAST
|
||||
@ -173,7 +177,6 @@ typedef enum
|
||||
UVM_PMM_ALLOC_FLAGS_MASK = (1 << 2) - 1
|
||||
} uvm_pmm_alloc_flags_t;
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Identifier for lists with zeroed chunks
|
||||
@ -227,6 +230,16 @@ unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *ch
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PCI_P2PDMA) && defined(NV_STRUCT_PAGE_HAS_ZONE_DEVICE_DATA)
|
||||
#include <linux/pci-p2pdma.h>
|
||||
|
||||
void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu);
|
||||
void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu);
|
||||
#else
|
||||
static inline void uvm_pmm_gpu_device_p2p_init(uvm_gpu_t *gpu) {}
|
||||
static inline void uvm_pmm_gpu_device_p2p_deinit(uvm_gpu_t *gpu) {}
|
||||
#endif
|
||||
|
||||
struct uvm_gpu_chunk_struct
|
||||
{
|
||||
// Physical address of GPU chunk. This may be removed to save memory
|
||||
@ -251,14 +264,16 @@ struct uvm_gpu_chunk_struct
|
||||
|
||||
// This flag is initalized when allocating a new root chunk from PMA.
|
||||
// It is set to true, if PMA already scrubbed the chunk. The flag is
|
||||
// only valid at allocation time (after uvm_pmm_gpu_alloc call), and
|
||||
// only valid at allocation time (after uvm_pmm_gpu_alloc_* call), and
|
||||
// the caller is not required to clear it before freeing the chunk. The
|
||||
// VA block chunk population code can query it to skip zeroing the
|
||||
// chunk.
|
||||
bool is_zero : 1;
|
||||
|
||||
// This flag indicates an allocated chunk is referenced by a device
|
||||
// This flag indicates an allocated user chunk is referenced by a device
|
||||
// private struct page PTE and therefore expects a page_free() callback.
|
||||
//
|
||||
// This field is always false in kernel chunks.
|
||||
bool is_referenced : 1;
|
||||
|
||||
uvm_pmm_gpu_chunk_state_t state : order_base_2(UVM_PMM_GPU_CHUNK_STATE_COUNT + 1);
|
||||
@ -286,6 +301,8 @@ struct uvm_gpu_chunk_struct
|
||||
// The VA block using the chunk, if any.
|
||||
// User chunks that are not backed by a VA block are considered to be
|
||||
// temporarily pinned and cannot be evicted.
|
||||
//
|
||||
// This field is always NULL in kernel chunks.
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
// If this is subchunk it points to the parent - in other words
|
||||
@ -403,6 +420,12 @@ static void uvm_gpu_chunk_set_size(uvm_gpu_chunk_t *chunk, uvm_chunk_size_t size
|
||||
// use it if the owning GPU is retained.
|
||||
uvm_gpu_t *uvm_gpu_chunk_get_gpu(const uvm_gpu_chunk_t *chunk);
|
||||
|
||||
// Returns true if the memory type of the chunk is a user type.
|
||||
static bool uvm_gpu_chunk_is_user(const uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
return uvm_pmm_gpu_memory_type_is_user(chunk->type);
|
||||
}
|
||||
|
||||
// Return the first struct page corresponding to the physical address range
|
||||
// of the given chunk.
|
||||
//
|
||||
@ -412,7 +435,10 @@ uvm_gpu_t *uvm_gpu_chunk_get_gpu(const uvm_gpu_chunk_t *chunk);
|
||||
// page containing the chunk's starting address.
|
||||
struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
|
||||
// Allocates num_chunks chunks of size chunk_size in caller-supplied array (chunks).
|
||||
// User memory allocator.
|
||||
//
|
||||
// Allocates num_chunks chunks of size chunk_size in caller-supplied array
|
||||
// (chunks).
|
||||
//
|
||||
// Returned chunks are in the TEMP_PINNED state, requiring a call to either
|
||||
// uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or
|
||||
@ -432,47 +458,24 @@ struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
// If the memory returned by the PMM allocator cannot be physically addressed,
|
||||
// the MMU interface provides user chunk mapping and unmapping functions
|
||||
// (uvm_mmu_chunk_map/unmap) that enable virtual addressing.
|
||||
NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t mem_type,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker);
|
||||
NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Helper for allocating kernel memory
|
||||
// Kernel memory allocator.
|
||||
//
|
||||
// Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to
|
||||
// allocated on success.
|
||||
//
|
||||
// If Confidential Computing is enabled, this helper allocates protected kernel
|
||||
// memory.
|
||||
static NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_KERNEL, flags, chunks, out_tracker);
|
||||
}
|
||||
|
||||
// Helper for allocating user memory
|
||||
//
|
||||
// Simple wrapper that just uses UVM_PMM_GPU_MEMORY_TYPE_USER for the memory
|
||||
// type.
|
||||
//
|
||||
// If Confidential Computing is enabled, this helper allocates protected user
|
||||
// memory.
|
||||
static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker);
|
||||
}
|
||||
// See uvm_pmm_gpu_alloc_user documentation for details on the behavior of this
|
||||
// function, with one exception: the returned kernel chunks are in the ALLOCATED
|
||||
// state.
|
||||
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
||||
// mark it as allocated.
|
||||
@ -486,7 +489,7 @@ void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm
|
||||
// Can only be used on user memory.
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
|
||||
// Frees the chunk. This also unpins the chunk if it is temporarily pinned.
|
||||
// Free a user or kernel chunk. Temporarily pinned chunks are unpinned.
|
||||
//
|
||||
// The tracker is optional and a NULL tracker indicates that no new operation
|
||||
// has been pushed for the chunk, but the tracker returned as part of
|
||||
@ -542,7 +545,8 @@ size_t uvm_pmm_gpu_get_subchunks(uvm_pmm_gpu_t *pmm,
|
||||
// leaf children must be allocated.
|
||||
void uvm_pmm_gpu_merge_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
|
||||
// Waits for all free chunk trackers (removing their completed entries) to complete.
|
||||
// Waits for all free chunk trackers (removing their completed entries) to
|
||||
// complete.
|
||||
//
|
||||
// This inherently races with any chunks being freed to this PMM. The assumption
|
||||
// is that the caller doesn't care about preventing new chunks from being freed,
|
||||
|
@ -126,7 +126,7 @@ NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sys
|
||||
NvU64 remove_key;
|
||||
|
||||
for (remove_key = base_key; remove_key < key; ++remove_key)
|
||||
(void *)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
|
||||
status = errno_to_nv_status(ret);
|
||||
@ -455,7 +455,7 @@ static NvU32 compute_gpu_mappings_entry_index(uvm_parent_processor_mask_t *dma_a
|
||||
// above and including the id and then counting the number of bits
|
||||
// remaining.
|
||||
uvm_parent_processor_mask_zero(&subset_mask);
|
||||
bitmap_set(subset_mask.bitmap, UVM_PARENT_ID_GPU0_VALUE, uvm_parent_id_gpu_index(id));
|
||||
uvm_parent_processor_mask_range_fill(&subset_mask, uvm_parent_gpu_id_from_index(0), uvm_parent_id_gpu_index(id));
|
||||
uvm_parent_processor_mask_and(&subset_mask, dma_addrs_mask, &subset_mask);
|
||||
|
||||
return uvm_parent_processor_mask_get_gpu_count(&subset_mask);
|
||||
|
@ -771,7 +771,7 @@ static NV_STATUS test_cpu_chunk_mig(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr_gpu0;
|
||||
|
||||
UVM_ASSERT(gpu0->parent == gpu1->parent);
|
||||
UVM_ASSERT(uvm_gpus_are_smc_peers(gpu0, gpu1));
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, NUMA_NO_NODE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
@ -1379,7 +1379,7 @@ static void find_shared_gpu_pair(const uvm_processor_mask_t *test_gpus,
|
||||
uvm_gpu_t *gpu1 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu0);
|
||||
|
||||
while (gpu1) {
|
||||
if (gpu0->parent == gpu1->parent) {
|
||||
if (uvm_gpus_are_smc_peers(gpu0, gpu1)) {
|
||||
*out_gpu0 = gpu0;
|
||||
*out_gpu1 = gpu1;
|
||||
return;
|
||||
|
@ -112,6 +112,15 @@ static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_pmm_gpu_memory_type_
|
||||
return UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
|
||||
}
|
||||
|
||||
// Validate the chunk state upon allocation.
|
||||
static bool gpu_chunk_allocation_state_is_valid(uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
if (uvm_gpu_chunk_is_user(chunk))
|
||||
return chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED;
|
||||
else
|
||||
return chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED;
|
||||
}
|
||||
|
||||
// Verify that the input chunks are in the correct state following alloc
|
||||
static NV_STATUS check_chunks(uvm_gpu_chunk_t **chunks,
|
||||
size_t num_chunks,
|
||||
@ -121,12 +130,13 @@ static NV_STATUS check_chunks(uvm_gpu_chunk_t **chunks,
|
||||
size_t i;
|
||||
|
||||
mem_type = pmm_squash_memory_type(mem_type);
|
||||
|
||||
for (i = 0; i < num_chunks; i++) {
|
||||
TEST_CHECK_RET(chunks[i]);
|
||||
TEST_CHECK_RET(chunks[i]->suballoc == NULL);
|
||||
TEST_CHECK_RET(chunks[i]->type == mem_type);
|
||||
TEST_CHECK_RET(chunks[i]->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
TEST_CHECK_RET(uvm_gpu_chunk_get_size(chunks[i]) == chunk_size);
|
||||
TEST_CHECK_RET(chunks[i]->type == mem_type);
|
||||
TEST_CHECK_RET(gpu_chunk_allocation_state_is_valid(chunks[i]));
|
||||
TEST_CHECK_RET(uvm_gpu_chunk_get_size(chunks[i]) == chunk_size);
|
||||
TEST_CHECK_RET(IS_ALIGNED(chunks[i]->address, chunk_size));
|
||||
}
|
||||
|
||||
@ -198,7 +208,18 @@ static NV_STATUS chunk_alloc_check(uvm_pmm_gpu_t *pmm,
|
||||
if (gpu->mem_info.size == 0)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, mem_type, flags, chunks, &local_tracker);
|
||||
// TODO: Bug 4287430: the calls to uvm_pmm_gpu_alloc_* request protected
|
||||
// memory, ignoring the protection type in mem_type. But unprotected memory
|
||||
// is currently not used in UVM, so the default protection (protected) is
|
||||
// correct.
|
||||
mem_type = pmm_squash_memory_type(mem_type);
|
||||
TEST_CHECK_RET(uvm_pmm_gpu_memory_type_is_protected(mem_type));
|
||||
|
||||
if (uvm_pmm_gpu_memory_type_is_user(mem_type))
|
||||
status = uvm_pmm_gpu_alloc_user(pmm, num_chunks, chunk_size, flags, chunks, &local_tracker);
|
||||
else
|
||||
status = uvm_pmm_gpu_alloc_kernel(pmm, num_chunks, chunk_size, flags, chunks, &local_tracker);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -216,14 +237,25 @@ static NV_STATUS chunk_alloc_user_check(uvm_pmm_gpu_t *pmm,
|
||||
NV_STATUS status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, mem_type, flags, chunks, &local_tracker);
|
||||
// TODO: Bug 4287430: the call to uvm_pmm_gpu_alloc_user requests protected
|
||||
// memory, ignoring the protection type in mem_type. But unprotected memory
|
||||
// is currently not used in UVM, so the default protection (protected) is
|
||||
// correct.
|
||||
mem_type = pmm_squash_memory_type(mem_type);
|
||||
TEST_CHECK_RET(uvm_pmm_gpu_memory_type_is_protected(mem_type));
|
||||
|
||||
status = uvm_pmm_gpu_alloc_user(pmm, num_chunks, chunk_size, flags, chunks, &local_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
return chunk_alloc_check_common(pmm, num_chunks, chunk_size, mem_type, flags, chunks, &local_tracker, tracker);
|
||||
}
|
||||
|
||||
static NV_STATUS check_leak(uvm_gpu_t *gpu, uvm_chunk_size_t chunk_size, uvm_pmm_gpu_memory_type_t type, NvS64 limit, NvU64 *chunks)
|
||||
static NV_STATUS check_leak(uvm_gpu_t *gpu,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t type,
|
||||
NvS64 limit,
|
||||
NvU64 *chunks)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
pmm_leak_bucket_t *bucket, *next;
|
||||
@ -702,9 +734,9 @@ static NV_STATUS split_test_single(uvm_pmm_gpu_t *pmm,
|
||||
TEST_CHECK_GOTO(split_chunks[i], error);
|
||||
TEST_CHECK_GOTO(split_chunks[i]->address == parent_addr + i * child_size, error);
|
||||
TEST_CHECK_GOTO(split_chunks[i]->suballoc == NULL, error);
|
||||
TEST_CHECK_GOTO(split_chunks[i]->type == parent_type, error);
|
||||
TEST_CHECK_GOTO(split_chunks[i]->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED, error);
|
||||
TEST_CHECK_GOTO(uvm_gpu_chunk_get_size(split_chunks[i]) == child_size, error);
|
||||
TEST_CHECK_GOTO(split_chunks[i]->type == parent_type, error);
|
||||
TEST_CHECK_GOTO(gpu_chunk_allocation_state_is_valid(split_chunks[i]), error);
|
||||
TEST_CHECK_GOTO(uvm_gpu_chunk_get_size(split_chunks[i]) == child_size, error);
|
||||
}
|
||||
|
||||
status = get_subchunks_test(pmm, temp_chunk, split_chunks, num_children);
|
||||
@ -714,9 +746,11 @@ static NV_STATUS split_test_single(uvm_pmm_gpu_t *pmm,
|
||||
if (mode == SPLIT_TEST_MODE_MERGE) {
|
||||
parent->chunk = temp_chunk;
|
||||
uvm_pmm_gpu_merge_chunk(pmm, parent->chunk);
|
||||
|
||||
TEST_CHECK_GOTO(parent->chunk->address == parent_addr, error);
|
||||
TEST_CHECK_GOTO(parent->chunk->suballoc == NULL, error);
|
||||
TEST_CHECK_GOTO(parent->chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED, error);
|
||||
TEST_CHECK_GOTO(gpu_chunk_allocation_state_is_valid(parent->chunk), error);
|
||||
|
||||
status = destroy_test_chunk(pmm, parent, verif_mem);
|
||||
}
|
||||
else {
|
||||
@ -1080,7 +1114,7 @@ static NV_STATUS test_pmm_reverse_map_single(uvm_gpu_t *gpu, uvm_va_space_t *va_
|
||||
|
||||
static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t *va_space, NvU64 addr, NvU64 size)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
uvm_va_block_context_t *va_block_context = NULL;
|
||||
NvU32 num_blocks;
|
||||
@ -1089,12 +1123,12 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
bool is_resident;
|
||||
|
||||
// In this test, the [addr:addr + size) VA region contains
|
||||
// several VA ranges with different sizes.
|
||||
// several managed ranges with different sizes.
|
||||
|
||||
// Find the first block to compute the base physical address of the root
|
||||
// chunk
|
||||
uvm_for_each_va_range_in(va_range, va_space, addr, addr + size - 1) {
|
||||
va_block = uvm_va_range_block(va_range, 0);
|
||||
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
|
||||
va_block = uvm_va_range_block(managed_range, 0);
|
||||
if (va_block)
|
||||
break;
|
||||
}
|
||||
@ -1121,15 +1155,13 @@ static NV_STATUS test_pmm_reverse_map_many_blocks(uvm_gpu_t *gpu, uvm_va_space_t
|
||||
num_blocks = uvm_pmm_gpu_phys_to_virt(&gpu->pmm, phys_addr.address, size, g_reverse_map_entries);
|
||||
TEST_CHECK_RET(num_blocks != 0);
|
||||
|
||||
// Iterate over all VA ranges and their VA blocks within the 2MB VA region.
|
||||
// Some blocks are not populated. However, we assume that blocks have been
|
||||
// populated in order so they have been assigned physical addresses
|
||||
// incrementally. Therefore, the reverse translations will show them in
|
||||
// order.
|
||||
uvm_for_each_va_range_in(va_range, va_space, addr, addr + size - 1) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
// Iterate over all managed ranges and their VA blocks within the 2MB VA
|
||||
// region. Some blocks are not populated. However, we assume that blocks
|
||||
// have been populated in order so they have been assigned physical
|
||||
// addresses incrementally. Therefore, the reverse translations will show
|
||||
// them in order.
|
||||
uvm_for_each_va_range_managed_in(managed_range, va_space, addr, addr + size - 1) {
|
||||
for_each_va_block_in_va_range(managed_range, va_block) {
|
||||
NvU32 num_va_block_pages = 0;
|
||||
|
||||
// Iterate over all the translations for the current VA block. One
|
||||
|
@ -53,7 +53,7 @@ static bool uvm_is_valid_vma_range(struct mm_struct *mm, NvU64 start, NvU64 leng
|
||||
|
||||
uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct mm_struct *mm, NvU64 base, NvU64 length)
|
||||
{
|
||||
uvm_va_range_t *va_range, *va_range_last;
|
||||
uvm_va_range_managed_t *managed_range, *managed_range_last;
|
||||
const NvU64 last_address = base + length - 1;
|
||||
|
||||
if (mm)
|
||||
@ -81,17 +81,17 @@ uvm_api_range_type_t uvm_api_range_type_check(uvm_va_space_t *va_space, struct m
|
||||
}
|
||||
}
|
||||
|
||||
va_range_last = NULL;
|
||||
managed_range_last = NULL;
|
||||
|
||||
uvm_for_each_managed_va_range_in_contig(va_range, va_space, base, last_address)
|
||||
va_range_last = va_range;
|
||||
uvm_for_each_va_range_managed_in_contig(managed_range, va_space, base, last_address)
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// Check if passed interval overlaps with an unmanaged VA range, or a
|
||||
// sub-interval not tracked by a VA range
|
||||
if (!va_range_last || va_range_last->node.end < last_address)
|
||||
// sub-interval not tracked by a managed range
|
||||
if (!managed_range_last || managed_range_last->va_range.node.end < last_address)
|
||||
return UVM_API_RANGE_TYPE_INVALID;
|
||||
|
||||
// Passed interval is fully covered by managed VA ranges
|
||||
// Passed interval is fully covered by managed ranges
|
||||
return UVM_API_RANGE_TYPE_MANAGED;
|
||||
}
|
||||
|
||||
@ -100,6 +100,7 @@ static NV_STATUS split_as_needed(uvm_va_space_t *va_space,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
void *data)
|
||||
{
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_range_t *va_range;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(addr));
|
||||
@ -113,12 +114,14 @@ static NV_STATUS split_as_needed(uvm_va_space_t *va_space,
|
||||
if (addr == va_range->node.start)
|
||||
return NV_OK;
|
||||
|
||||
managed_range = uvm_va_range_to_managed_or_null(va_range);
|
||||
|
||||
// Only managed ranges can be split.
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
if (!managed_range)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
if (split_needed_cb(uvm_va_range_get_policy(va_range), data))
|
||||
return uvm_va_range_split(va_range, addr - 1, NULL);
|
||||
if (split_needed_cb(&managed_range->policy, data))
|
||||
return uvm_va_range_split(managed_range, addr - 1, NULL);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
@ -228,10 +231,10 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t preferred_location,
|
||||
int preferred_cpu_nid,
|
||||
uvm_va_range_t **first_va_range_to_migrate,
|
||||
uvm_va_range_managed_t **first_managed_range_to_migrate,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_va_range_t *va_range, *va_range_last;
|
||||
uvm_va_range_managed_t *managed_range, *managed_range_last;
|
||||
const NvU64 last_address = base + length - 1;
|
||||
bool preferred_location_is_faultable_gpu = false;
|
||||
preferred_location_split_params_t split_params;
|
||||
@ -240,7 +243,7 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
if (UVM_ID_IS_VALID(preferred_location)) {
|
||||
*first_va_range_to_migrate = NULL;
|
||||
*first_managed_range_to_migrate = NULL;
|
||||
preferred_location_is_faultable_gpu = UVM_ID_IS_GPU(preferred_location) &&
|
||||
uvm_processor_mask_test(&va_space->faultable_processors,
|
||||
preferred_location);
|
||||
@ -256,19 +259,19 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
va_range_last = NULL;
|
||||
uvm_for_each_managed_va_range_in_contig(va_range, va_space, base, last_address) {
|
||||
managed_range_last = NULL;
|
||||
uvm_for_each_va_range_managed_in_contig(managed_range, va_space, base, last_address) {
|
||||
bool found_non_migratable_interval = false;
|
||||
|
||||
va_range_last = va_range;
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// If we didn't split the ends, check that they match
|
||||
if (va_range->node.start < base || va_range->node.end > last_address)
|
||||
UVM_ASSERT(uvm_id_equal(uvm_va_range_get_policy(va_range)->preferred_location, preferred_location));
|
||||
if (managed_range->va_range.node.start < base || managed_range->va_range.node.end > last_address)
|
||||
UVM_ASSERT(uvm_id_equal(managed_range->policy.preferred_location, preferred_location));
|
||||
|
||||
if (UVM_ID_IS_VALID(preferred_location)) {
|
||||
const NvU64 start = max(base, va_range->node.start);
|
||||
const NvU64 end = min(last_address, va_range->node.end);
|
||||
const NvU64 start = max(base, managed_range->va_range.node.start);
|
||||
const NvU64 end = min(last_address, managed_range->va_range.node.end);
|
||||
|
||||
found_non_migratable_interval = !uvm_range_group_all_migratable(va_space, start, end);
|
||||
|
||||
@ -276,18 +279,22 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
}
|
||||
|
||||
status = uvm_va_range_set_preferred_location(va_range, preferred_location, preferred_cpu_nid, mm, out_tracker);
|
||||
status = uvm_va_range_set_preferred_location(managed_range,
|
||||
preferred_location,
|
||||
preferred_cpu_nid,
|
||||
mm,
|
||||
out_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Return the first VA range that needs to be migrated so the caller
|
||||
// function doesn't need to traverse the tree again
|
||||
if (found_non_migratable_interval && (*first_va_range_to_migrate == NULL))
|
||||
*first_va_range_to_migrate = va_range;
|
||||
// Return the first managed range that needs to be migrated so the
|
||||
// caller function doesn't need to traverse the tree again
|
||||
if (found_non_migratable_interval && (*first_managed_range_to_migrate == NULL))
|
||||
*first_managed_range_to_migrate = managed_range;
|
||||
}
|
||||
|
||||
if (va_range_last) {
|
||||
UVM_ASSERT(va_range_last->node.end >= last_address);
|
||||
if (managed_range_last) {
|
||||
UVM_ASSERT(managed_range_last->va_range.node.end >= last_address);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@ -308,8 +315,8 @@ NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS
|
||||
NV_STATUS tracker_status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_range_t *va_range = NULL;
|
||||
uvm_va_range_t *first_va_range_to_migrate = NULL;
|
||||
uvm_va_range_managed_t *managed_range = NULL;
|
||||
uvm_va_range_managed_t *first_managed_range_to_migrate = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_processor_id_t preferred_location_id;
|
||||
int preferred_cpu_nid = NUMA_NO_NODE;
|
||||
@ -386,27 +393,30 @@ NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS
|
||||
length,
|
||||
preferred_location_id,
|
||||
preferred_cpu_nid,
|
||||
&first_va_range_to_migrate,
|
||||
&first_managed_range_to_migrate,
|
||||
&local_tracker);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// No VA range to migrate, early exit
|
||||
if (!first_va_range_to_migrate)
|
||||
// No managed range to migrate, early exit
|
||||
if (!first_managed_range_to_migrate)
|
||||
goto done;
|
||||
|
||||
uvm_va_space_downgrade_write(va_space);
|
||||
has_va_space_write_lock = false;
|
||||
|
||||
// No need to check for holes in the VA ranges span here, this was checked by preferred_location_set
|
||||
for (va_range = first_va_range_to_migrate; va_range; va_range = uvm_va_space_iter_next(va_range, end)) {
|
||||
// No need to check for holes in the managed ranges span here, this was
|
||||
// checked by preferred_location_set
|
||||
for (managed_range = first_managed_range_to_migrate;
|
||||
managed_range;
|
||||
managed_range = uvm_va_space_iter_managed_next(managed_range, end)) {
|
||||
uvm_range_group_range_iter_t iter;
|
||||
NvU64 cur_start = max(start, va_range->node.start);
|
||||
NvU64 cur_end = min(end, va_range->node.end);
|
||||
NvU64 cur_start = max(start, managed_range->va_range.node.start);
|
||||
NvU64 cur_end = min(end, managed_range->va_range.node.end);
|
||||
|
||||
uvm_range_group_for_each_migratability_in(&iter, va_space, cur_start, cur_end) {
|
||||
if (!iter.migratable) {
|
||||
status = uvm_range_group_va_range_migrate(va_range, iter.start, iter.end, &local_tracker);
|
||||
status = uvm_range_group_va_range_migrate(managed_range, iter.start, iter.end, &local_tracker);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
@ -504,7 +514,7 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
||||
NV_STATUS status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
uvm_va_policy_t *policy = &va_block->managed_range->policy;
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
|
||||
@ -604,29 +614,29 @@ static NV_STATUS accessed_by_set(uvm_va_space_t *va_space,
|
||||
goto done;
|
||||
|
||||
if (type == UVM_API_RANGE_TYPE_MANAGED) {
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_t *va_range_last = NULL;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_range_managed_t *managed_range_last = NULL;
|
||||
|
||||
uvm_for_each_managed_va_range_in_contig(va_range, va_space, base, last_address) {
|
||||
va_range_last = va_range;
|
||||
uvm_for_each_va_range_managed_in_contig(managed_range, va_space, base, last_address) {
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// If we didn't split the ends, check that they match
|
||||
if (va_range->node.start < base || va_range->node.end > last_address)
|
||||
UVM_ASSERT(uvm_processor_mask_test(&uvm_va_range_get_policy(va_range)->accessed_by,
|
||||
if (managed_range->va_range.node.start < base || managed_range->va_range.node.end > last_address)
|
||||
UVM_ASSERT(uvm_processor_mask_test(&managed_range->policy.accessed_by,
|
||||
processor_id) == set_bit);
|
||||
|
||||
if (set_bit) {
|
||||
status = uvm_va_range_set_accessed_by(va_range, processor_id, mm, &local_tracker);
|
||||
status = uvm_va_range_set_accessed_by(managed_range, processor_id, mm, &local_tracker);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
else {
|
||||
uvm_va_range_unset_accessed_by(va_range, processor_id, &local_tracker);
|
||||
uvm_va_range_unset_accessed_by(managed_range, processor_id, &local_tracker);
|
||||
}
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_range_last);
|
||||
UVM_ASSERT(va_range_last->node.end >= last_address);
|
||||
UVM_ASSERT(managed_range_last);
|
||||
UVM_ASSERT(managed_range_last->va_range.node.end >= last_address);
|
||||
}
|
||||
else {
|
||||
// NULL mm case already filtered by uvm_api_range_type_check()
|
||||
@ -672,7 +682,7 @@ static NV_STATUS va_block_set_read_duplication_locked(uvm_va_block_t *va_block,
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
// Force CPU page residency to be on the preferred NUMA node.
|
||||
va_block_context->make_resident.dest_nid = uvm_va_range_get_policy(va_block->va_range)->preferred_nid;
|
||||
va_block_context->make_resident.dest_nid = va_block->managed_range->policy.preferred_nid;
|
||||
|
||||
for_each_id_in_mask(src_id, &va_block->resident) {
|
||||
NV_STATUS status;
|
||||
@ -721,7 +731,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
uvm_processor_id_t processor_id;
|
||||
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
|
||||
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
const uvm_va_policy_t *policy = &va_block->managed_range->policy;
|
||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@ -863,15 +873,15 @@ static NV_STATUS read_duplication_set(uvm_va_space_t *va_space, NvU64 base, NvU6
|
||||
goto done;
|
||||
|
||||
if (type == UVM_API_RANGE_TYPE_MANAGED) {
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_t *va_range_last = NULL;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_va_range_managed_t *managed_range_last = NULL;
|
||||
|
||||
uvm_for_each_managed_va_range_in_contig(va_range, va_space, base, last_address) {
|
||||
va_range_last = va_range;
|
||||
uvm_for_each_va_range_managed_in_contig(managed_range, va_space, base, last_address) {
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// If we didn't split the ends, check that they match
|
||||
if (va_range->node.start < base || va_range->node.end > last_address)
|
||||
UVM_ASSERT(uvm_va_range_get_policy(va_range)->read_duplication == new_policy);
|
||||
if (managed_range->va_range.node.start < base || managed_range->va_range.node.end > last_address)
|
||||
UVM_ASSERT(managed_range->policy.read_duplication == new_policy);
|
||||
|
||||
// If the va_space cannot currently read duplicate, only change the user
|
||||
// state. All memory should already have read duplication unset.
|
||||
@ -879,22 +889,22 @@ static NV_STATUS read_duplication_set(uvm_va_space_t *va_space, NvU64 base, NvU6
|
||||
|
||||
// Handle SetAccessedBy mappings
|
||||
if (new_policy == UVM_READ_DUPLICATION_ENABLED) {
|
||||
status = uvm_va_range_set_read_duplication(va_range, mm);
|
||||
status = uvm_va_range_set_read_duplication(managed_range, mm);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
else {
|
||||
// If unsetting read duplication fails, the return status is
|
||||
// not propagated back to the caller
|
||||
(void)uvm_va_range_unset_read_duplication(va_range, mm);
|
||||
(void)uvm_va_range_unset_read_duplication(managed_range, mm);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_range_get_policy(va_range)->read_duplication = new_policy;
|
||||
managed_range->policy.read_duplication = new_policy;
|
||||
}
|
||||
|
||||
UVM_ASSERT(va_range_last);
|
||||
UVM_ASSERT(va_range_last->node.end >= last_address);
|
||||
UVM_ASSERT(managed_range_last);
|
||||
UVM_ASSERT(managed_range_last->va_range.node.end >= last_address);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(type == UVM_API_RANGE_TYPE_HMM);
|
||||
@ -947,19 +957,16 @@ static NV_STATUS system_wide_atomics_set(uvm_va_space_t *va_space, const NvProce
|
||||
|
||||
already_enabled = uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||
if (enable && !already_enabled) {
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_range_managed_t *managed_range;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, NULL);
|
||||
NV_STATUS tracker_status;
|
||||
|
||||
// Revoke atomic mappings from the calling GPU
|
||||
uvm_for_each_va_range(va_range, va_space) {
|
||||
uvm_for_each_va_range_managed(managed_range, va_space) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
continue;
|
||||
|
||||
for_each_va_block_in_va_range(va_range, va_block) {
|
||||
for_each_va_block_in_va_range(managed_range, va_block) {
|
||||
uvm_page_mask_t *non_resident_pages = &va_block_context->caller_page_mask;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
@ -23,6 +23,8 @@
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_processors.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_gpu.h"
|
||||
|
||||
static struct kmem_cache *g_uvm_processor_mask_cache __read_mostly;
|
||||
const uvm_processor_mask_t g_uvm_processor_mask_cpu = { .bitmap = { 1 << UVM_PARENT_ID_CPU_VALUE }};
|
||||
@ -107,3 +109,31 @@ bool uvm_numa_id_eq(int nid0, int nid1)
|
||||
|
||||
return nid0 == nid1;
|
||||
}
|
||||
|
||||
const char *uvm_processor_get_name(uvm_processor_id_t id)
|
||||
{
|
||||
if (UVM_ID_IS_CPU(id))
|
||||
return "0: CPU";
|
||||
else
|
||||
return uvm_gpu_name(uvm_gpu_get(id));
|
||||
}
|
||||
|
||||
void uvm_processor_get_uuid(uvm_processor_id_t id, NvProcessorUuid *out_uuid)
|
||||
{
|
||||
if (UVM_ID_IS_CPU(id)) {
|
||||
memcpy(out_uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*out_uuid));
|
||||
}
|
||||
else {
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(id);
|
||||
UVM_ASSERT(gpu);
|
||||
memcpy(out_uuid, &gpu->uuid, sizeof(*out_uuid));
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_processor_has_memory(uvm_processor_id_t id)
|
||||
{
|
||||
if (UVM_ID_IS_CPU(id))
|
||||
return true;
|
||||
|
||||
return uvm_gpu_get(id)->mem_info.size > 0;
|
||||
}
|
||||
|
@ -176,6 +176,13 @@ static void prefix_fn_mask##_copy(mask_t *dst, const mask_t *src)
|
||||
bitmap_copy(dst->bitmap, src->bitmap, (maxval)); \
|
||||
} \
|
||||
\
|
||||
static void prefix_fn_mask##_range_fill(mask_t *mask, proc_id_t start, NvU32 nbits) \
|
||||
{ \
|
||||
UVM_ASSERT_MSG(start.val + nbits <= (maxval), "start %u nbits %u\n", start.val, nbits); \
|
||||
\
|
||||
bitmap_set(mask->bitmap, start.val, nbits); \
|
||||
} \
|
||||
\
|
||||
static bool prefix_fn_mask##_and(mask_t *dst, const mask_t *src1, const mask_t *src2) \
|
||||
{ \
|
||||
return bitmap_and(dst->bitmap, src1->bitmap, src2->bitmap, (maxval)) != 0; \
|
||||
@ -276,6 +283,12 @@ typedef uvm_processor_id_t uvm_gpu_id_t;
|
||||
// Maximum number of GPUs/processors that can be represented with the id types
|
||||
#define UVM_PARENT_ID_MAX_GPUS NV_MAX_DEVICES
|
||||
#define UVM_PARENT_ID_MAX_PROCESSORS (UVM_PARENT_ID_MAX_GPUS + 1)
|
||||
#define UVM_MAX_UNIQUE_PARENT_GPU_PAIRS SUM_FROM_0_TO_N(UVM_PARENT_ID_MAX_GPUS - 1)
|
||||
|
||||
// Note that this is the number of MIG instance pairs between two different
|
||||
// parent GPUs so parent A sub-processor ID 0 to parent B sub-processor ID 0
|
||||
// is valid.
|
||||
#define UVM_MAX_UNIQUE_SUB_PROCESSOR_PAIRS SUM_FROM_0_TO_N(UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
|
||||
#define UVM_ID_MAX_GPUS (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
|
||||
#define UVM_ID_MAX_PROCESSORS (UVM_ID_MAX_GPUS + 1)
|
||||
@ -711,4 +724,16 @@ void uvm_processor_mask_cache_exit(void);
|
||||
uvm_processor_mask_t *uvm_processor_mask_cache_alloc(void);
|
||||
void uvm_processor_mask_cache_free(uvm_processor_mask_t *mask);
|
||||
|
||||
// Return the name of the given processor ID.
|
||||
// Locking: This should only be called when the ID is the CPU or the GPU is
|
||||
// retained (such as the va_space lock being held).
|
||||
const char *uvm_processor_get_name(uvm_processor_id_t id);
|
||||
|
||||
// Return the UUID in 'out_uuid' for the given processor ID 'id'.
|
||||
// Locking: This should only be called when the ID is the CPU or the GPU is
|
||||
// retained (such as the va_space lock being held).
|
||||
void uvm_processor_get_uuid(uvm_processor_id_t id, NvProcessorUuid *out_uuid);
|
||||
|
||||
bool uvm_processor_has_memory(uvm_processor_id_t id);
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user