530.30.02
This commit is contained in:
parent
e598191e8e
commit
4397463e73
12
CHANGELOG.md
12
CHANGELOG.md
@ -1,5 +1,13 @@
|
||||
# Changelog
|
||||
|
||||
## Release 530 Entries
|
||||
|
||||
### [530.30.02] 2023-02-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for resizable BAR on Linux when NVreg_EnableResizableBar=1 module param is set. [#3](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/3) by @sjkelly
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.89.02] 2023-02-08
|
||||
@ -18,6 +26,10 @@
|
||||
|
||||
### [525.60.11] 2022-11-28
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed nvenc compatibility with usermode clients [#104](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/104)
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 525.89.02.
|
||||
version 530.30.02.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
525.89.02 driver release. This can be achieved by installing
|
||||
530.30.02 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -167,7 +167,7 @@ for the target kernel.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 525.89.02 release,
|
||||
(see the table below). However, in the 530.30.02 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/525.89.02/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/530.30.02/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
|
@ -70,9 +70,13 @@ $(foreach _module, $(NV_KERNEL_MODULES), \
|
||||
|
||||
EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"525.89.02\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"530.30.02\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
endif
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@ -259,6 +263,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/platform/tegra/dce/dce-client-ipc.h \
|
||||
linux/nvhost.h \
|
||||
linux/nvhost_t194.h \
|
||||
linux/host1x-next.h \
|
||||
asm/book3s/64/hash-64k.h \
|
||||
asm/set_memory.h \
|
||||
asm/prom.h \
|
||||
|
@ -81,12 +81,12 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ad10x.bin");
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
@ -100,12 +100,12 @@ static inline const char *nv_firmware_path(
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ad10x.bin");
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ga10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
@ -125,7 +125,7 @@ static inline const char *nv_firmware_path(
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ad10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ga10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
|
@ -90,30 +90,6 @@ typedef enum VGPU_DEVICE_STATE_E
|
||||
NV_VGPU_DEV_IN_USE = 2
|
||||
} VGPU_DEVICE_STATE;
|
||||
|
||||
typedef enum _VMBUS_CMD_TYPE
|
||||
{
|
||||
VMBUS_CMD_TYPE_INVALID = 0,
|
||||
VMBUS_CMD_TYPE_SETUP = 1,
|
||||
VMBUS_CMD_TYPE_SENDPACKET = 2,
|
||||
VMBUS_CMD_TYPE_CLEANUP = 3,
|
||||
} VMBUS_CMD_TYPE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 request_id;
|
||||
NvU32 page_count;
|
||||
NvU64 *pPfns;
|
||||
void *buffer;
|
||||
NvU32 bufferlen;
|
||||
} vmbus_send_packet_cmd_params;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 override_sint;
|
||||
NvU8 *nv_guid;
|
||||
} vmbus_setup_cmd_params;
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
@ -104,7 +104,7 @@ typedef struct nv_ioctl_rm_api_version
|
||||
|
||||
#define NV_RM_API_VERSION_CMD_STRICT 0
|
||||
#define NV_RM_API_VERSION_CMD_RELAXED '1'
|
||||
#define NV_RM_API_VERSION_CMD_OVERRIDE '2'
|
||||
#define NV_RM_API_VERSION_CMD_QUERY '2'
|
||||
|
||||
#define NV_RM_API_VERSION_REPLY_UNRECOGNIZED 0
|
||||
#define NV_RM_API_VERSION_REPLY_RECOGNIZED 1
|
||||
|
@ -633,6 +633,26 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
free_pages(ptr, order); \
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
#if defined(__pgprot_mask)
|
||||
#define NV_PAGE_KERNEL_NOCACHE_NOENC __pgprot_mask(__PAGE_KERNEL_NOCACHE)
|
||||
@ -654,7 +674,8 @@ static inline NvUPtr nv_vmap(struct page **pages, NvU32 page_count,
|
||||
#if defined(PAGE_KERNEL_NOENC)
|
||||
if (unencrypted)
|
||||
{
|
||||
prot = cached ? PAGE_KERNEL_NOENC : NV_PAGE_KERNEL_NOCACHE_NOENC;
|
||||
prot = cached ? nv_adjust_pgprot(PAGE_KERNEL_NOENC, 0) :
|
||||
nv_adjust_pgprot(NV_PAGE_KERNEL_NOCACHE_NOENC, 0);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@ -939,26 +960,6 @@ static inline int nv_remap_page_range(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline pgprot_t nv_adjust_pgprot(pgprot_t vm_prot, NvU32 extra)
|
||||
{
|
||||
pgprot_t prot = __pgprot(pgprot_val(vm_prot) | extra);
|
||||
#if defined(CONFIG_AMD_MEM_ENCRYPT) && defined(NV_PGPROT_DECRYPTED_PRESENT)
|
||||
/*
|
||||
* When AMD memory encryption is enabled, device memory mappings with the
|
||||
* C-bit set read as 0xFF, so ensure the bit is cleared for user mappings.
|
||||
*
|
||||
* If cc_mkdec() is present, then pgprot_decrypted() can't be used.
|
||||
*/
|
||||
#if defined(NV_CC_MKDEC_PRESENT)
|
||||
prot = __pgprot(__sme_clr(pgprot_val(vm_prot)));
|
||||
#else
|
||||
prot = pgprot_decrypted(prot);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return prot;
|
||||
}
|
||||
|
||||
static inline int nv_io_remap_page_range(struct vm_area_struct *vma,
|
||||
NvU64 phys_addr, NvU64 size, NvU32 extra_prot)
|
||||
{
|
||||
@ -1182,7 +1183,7 @@ typedef struct nv_alloc_s {
|
||||
NvBool zeroed : 1;
|
||||
NvBool aliased : 1;
|
||||
NvBool user : 1;
|
||||
NvBool node0 : 1;
|
||||
NvBool node : 1;
|
||||
NvBool peer_io : 1;
|
||||
NvBool physical : 1;
|
||||
NvBool unencrypted : 1;
|
||||
@ -1196,6 +1197,7 @@ typedef struct nv_alloc_s {
|
||||
unsigned int pid;
|
||||
struct page **user_pages;
|
||||
NvU64 guest_id; /* id of guest VM */
|
||||
NvS32 node_id; /* Node id for memory allocation when node is set in flags */
|
||||
void *import_priv;
|
||||
struct sg_table *import_sgt;
|
||||
} nv_alloc_t;
|
||||
@ -1436,6 +1438,24 @@ struct nv_dma_device {
|
||||
NvBool nvlink;
|
||||
};
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
void *notifier_data;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
#endif
|
||||
|
||||
/* linux-specific version of old nv_state_t */
|
||||
/* this is a general os-specific state structure. the first element *must* be
|
||||
the general state structure, for the generic unix-based code */
|
||||
@ -1530,8 +1550,13 @@ typedef struct nv_linux_state_s {
|
||||
/* Per-device notifier block for ACPI events */
|
||||
struct notifier_block acpi_nb;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
nv_acpi_t* nv_acpi_object;
|
||||
#endif
|
||||
|
||||
/* Lock serializing ISRs for different SOC vectors */
|
||||
nv_spinlock_t soc_isr_lock;
|
||||
void *soc_bh_mutex;
|
||||
|
||||
struct nv_timer snapshot_timer;
|
||||
nv_spinlock_t snapshot_timer_lock;
|
||||
@ -1577,24 +1602,6 @@ extern struct rw_semaphore nv_system_pm_lock;
|
||||
|
||||
extern NvBool nv_ats_supported;
|
||||
|
||||
#if defined(NV_LINUX_ACPI_EVENTS_SUPPORTED)
|
||||
/*
|
||||
* acpi data storage structure
|
||||
*
|
||||
* This structure retains the pointer to the device,
|
||||
* and any other baggage we want to carry along
|
||||
*
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
nvidia_stack_t *sp;
|
||||
struct acpi_device *device;
|
||||
struct acpi_handle *handle;
|
||||
int notify_handler_installed;
|
||||
} nv_acpi_t;
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* file-private data
|
||||
* hide a pointer to our data structures in a file-private ptr
|
||||
@ -1744,6 +1751,7 @@ static inline NV_STATUS nv_check_gpu_state(nv_state_t *nv)
|
||||
|
||||
extern NvU32 NVreg_EnableUserNUMAManagement;
|
||||
extern NvU32 NVreg_RegisterPCIDriver;
|
||||
extern NvU32 NVreg_EnableResizableBar;
|
||||
|
||||
extern NvU32 num_probed_nv_devices;
|
||||
extern NvU32 num_nv_devices;
|
||||
|
@ -27,6 +27,9 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#define NV_GPU_BAR1 1
|
||||
#define NV_GPU_BAR3 3
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
int nv_pci_count_devices(void);
|
||||
|
@ -315,6 +315,7 @@ typedef enum
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
NV_SOC_IRQ_TCPC2DISP_TYPE,
|
||||
NV_SOC_IRQ_INVALID_TYPE
|
||||
} nv_soc_irq_type_t;
|
||||
|
||||
@ -329,6 +330,7 @@ typedef struct nv_soc_irq_info_s {
|
||||
NvU32 gpio_num;
|
||||
NvU32 dpaux_instance;
|
||||
} irq_data;
|
||||
NvS32 ref_count;
|
||||
} nv_soc_irq_info_t;
|
||||
|
||||
#define NV_MAX_SOC_IRQS 6
|
||||
@ -384,9 +386,11 @@ typedef struct nv_state_t
|
||||
NvS32 current_soc_irq;
|
||||
NvU32 num_soc_irqs;
|
||||
NvU32 hdacodec_irq;
|
||||
NvU32 tcpc2disp_irq;
|
||||
NvU8 *soc_dcb_blob;
|
||||
NvU32 soc_dcb_size;
|
||||
NvU32 disp_sw_soc_chip_id;
|
||||
NvBool soc_is_dpalt_mode_supported;
|
||||
|
||||
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
|
||||
NvU32 igpu_nonstall_irq;
|
||||
@ -649,7 +653,8 @@ static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
|
||||
return ((nv->fb) && (nv->fb->size != 0) &&
|
||||
(offset >= nv->fb->cpu_address) &&
|
||||
((offset + (length - 1)) >= offset) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
@ -739,7 +744,7 @@ nv_state_t* NV_API_CALL nv_get_ctl_state (void);
|
||||
void NV_API_CALL nv_set_dma_address_size (nv_state_t *, NvU32 );
|
||||
|
||||
NV_STATUS NV_API_CALL nv_alias_pages (nv_state_t *, NvU32, NvU32, NvU32, NvU64, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_alloc_pages (nv_state_t *, NvU32, NvBool, NvU32, NvBool, NvBool, NvS32, NvU64 *, void **);
|
||||
NV_STATUS NV_API_CALL nv_free_pages (nv_state_t *, NvU32, NvBool, NvU32, void *);
|
||||
|
||||
NV_STATUS NV_API_CALL nv_register_user_pages (nv_state_t *, NvU64, NvU64 *, void *, void **);
|
||||
@ -915,7 +920,6 @@ NV_STATUS NV_API_CALL rm_write_registry_string (nvidia_stack_t *, nv_state_t *
|
||||
void NV_API_CALL rm_parse_option_string (nvidia_stack_t *, const char *);
|
||||
char* NV_API_CALL rm_remove_spaces (const char *);
|
||||
char* NV_API_CALL rm_string_token (char **, const char);
|
||||
void NV_API_CALL rm_vgpu_vfio_set_driver_vm(nvidia_stack_t *, NvBool);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_run_rc_callback (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_execute_work_item (nvidia_stack_t *, void *);
|
||||
@ -985,11 +989,12 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
void NV_API_CALL rm_acpi_nvpcf_notify(nvidia_stack_t *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
|
@ -921,6 +921,23 @@ NV_STATUS nvUvmInterfaceGetNonReplayableFaults(UvmGpuFaultInfo *pFaultInfo,
|
||||
void *pFaultBuffer,
|
||||
NvU32 *numFaults);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceFlushReplayableFaultBuffer
|
||||
|
||||
This function sends an RPC to GSP in order to flush the HW replayable fault buffer.
|
||||
|
||||
NOTES:
|
||||
- This function DOES NOT acquire the RM API or GPU locks. That is because
|
||||
it is called during fault servicing, which could produce deadlocks.
|
||||
|
||||
Arguments:
|
||||
device[IN] - Device handle associated with the gpu
|
||||
|
||||
Error codes:
|
||||
NV_ERR_INVALID_ARGUMENT
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceFlushReplayableFaultBuffer(uvmGpuDeviceHandle device);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceInitAccessCntrInfo
|
||||
|
||||
@ -1054,11 +1071,13 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
|
||||
hMemory[IN] - Memory handle.
|
||||
offset [IN] - Offset from the beginning of the allocation
|
||||
where PTE mappings should begin.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size [IN] - Length of the allocation for which PTEs
|
||||
should be built.
|
||||
Should be aligned with pagesize associated
|
||||
Should be aligned with mappingPagesize
|
||||
in gpuExternalMappingInfo associated
|
||||
with the allocation.
|
||||
size = 0 will be interpreted as the total size
|
||||
of the allocation.
|
||||
|
@ -110,7 +110,7 @@ typedef struct UvmGpuMemoryInfo_tag
|
||||
NvBool deviceDescendant;
|
||||
|
||||
// Out: Page size associated with the phys alloc.
|
||||
NvU32 pageSize;
|
||||
NvU64 pageSize;
|
||||
|
||||
// Out: Set to TRUE, if the allocation is contiguous.
|
||||
NvBool contig;
|
||||
@ -306,6 +306,7 @@ typedef struct UvmGpuChannelAllocParams_tag
|
||||
|
||||
// interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE
|
||||
NvU32 engineType;
|
||||
|
||||
} UvmGpuChannelAllocParams;
|
||||
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag
|
||||
@ -371,7 +372,6 @@ typedef enum
|
||||
UVM_LINK_TYPE_NVLINK_2,
|
||||
UVM_LINK_TYPE_NVLINK_3,
|
||||
UVM_LINK_TYPE_NVLINK_4,
|
||||
UVM_LINK_TYPE_C2C,
|
||||
} UVM_LINK_TYPE;
|
||||
|
||||
typedef struct UvmGpuCaps_tag
|
||||
@ -409,7 +409,7 @@ typedef struct UvmGpuCaps_tag
|
||||
|
||||
typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
{
|
||||
NvU32 bigPageSize;
|
||||
NvU64 bigPageSize;
|
||||
|
||||
NvBool atsEnabled;
|
||||
|
||||
@ -430,7 +430,7 @@ typedef struct UvmGpuAddressSpaceInfo_tag
|
||||
typedef struct UvmGpuAllocInfo_tag
|
||||
{
|
||||
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
|
||||
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
|
||||
NvU64 alignment; // Virtual alignment
|
||||
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
|
||||
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
|
||||
@ -516,6 +516,13 @@ typedef struct UvmGpuExternalMappingInfo_tag
|
||||
// In: Size of the buffer to store PTEs (in bytes).
|
||||
NvU64 pteBufferSize;
|
||||
|
||||
// In: Page size for mapping
|
||||
// If this field is passed as 0, the page size
|
||||
// of the allocation is used for mapping.
|
||||
// nvUvmInterfaceGetExternalAllocPtes must pass
|
||||
// this field as zero.
|
||||
NvU64 mappingPageSize;
|
||||
|
||||
// In: Pointer to a buffer to store PTEs.
|
||||
// Out: The interface will fill the buffer with PTEs
|
||||
NvU64 *pteBuffer;
|
||||
@ -826,6 +833,7 @@ typedef struct UvmGpuFaultInfo_tag
|
||||
|
||||
// Preallocated stack for functions called from the UVM isr bottom half
|
||||
void *isr_bh_sp;
|
||||
|
||||
} nonReplayable;
|
||||
NvHandle faultBufferHandle;
|
||||
} UvmGpuFaultInfo;
|
||||
|
@ -520,14 +520,23 @@ struct NvKmsKapiFunctionsTable {
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke modeset permissions previously granted. This currently applies for all
|
||||
* previous grant requests for this device.
|
||||
* Revoke permissions previously granted. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to revoke.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)(struct NvKmsKapiDevice *device);
|
||||
NvBool (*revokePermissions)
|
||||
(
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
|
@ -181,7 +181,6 @@ NV_STATUS NV_API_CALL os_put_page (NvU64 address);
|
||||
NvU32 NV_API_CALL os_get_page_refcount (NvU64 address);
|
||||
NvU32 NV_API_CALL os_count_tail_pages (NvU64 address);
|
||||
void NV_API_CALL os_free_pages_phys (NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_call_nv_vmbus (NvU32, void *);
|
||||
NV_STATUS NV_API_CALL os_open_temporary_file (void **);
|
||||
void NV_API_CALL os_close_file (void *);
|
||||
NV_STATUS NV_API_CALL os_write_file (void *, NvU8 *, NvU64, NvU64);
|
||||
|
@ -74,6 +74,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_own_page_fault_intr(nvidia_stack_t *, nvgpuDevi
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_fault_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuFaultInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_get_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, void *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_flush_replayable_fault_buffer(nvidia_stack_t *, nvgpuDeviceHandle_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack_t *, nvgpuFaultInfo_t, NvBool *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
|
@ -8,19 +8,11 @@ cd $SCRIPTDIR
|
||||
|
||||
CC="$1"
|
||||
ARCH=$2
|
||||
ISYSTEM=`$CC -print-file-name=include 2> /dev/null`
|
||||
SOURCES=$3
|
||||
HEADERS=$SOURCES/include
|
||||
OUTPUT=$4
|
||||
XEN_PRESENT=1
|
||||
PREEMPT_RT_PRESENT=0
|
||||
KERNEL_ARCH="$ARCH"
|
||||
|
||||
if [ "$ARCH" = "i386" -o "$ARCH" = "x86_64" ]; then
|
||||
if [ -d "$SOURCES/arch/x86" ]; then
|
||||
KERNEL_ARCH="x86"
|
||||
fi
|
||||
fi
|
||||
|
||||
# VGX_BUILD parameter defined only for VGX builds (vGPU Host driver)
|
||||
# VGX_KVM_BUILD parameter defined only vGPU builds on KVM hypervisor
|
||||
@ -69,16 +61,10 @@ test_header_presence() {
|
||||
# NV_LINUX_FENCE_H_PRESENT, and that is either defined or undefined, in the
|
||||
# output (which goes to stdout, just like the rest of this file).
|
||||
|
||||
# -MG or -MD can interfere with the use of -M and -M -MG for testing file
|
||||
# existence; filter out any occurrences from CFLAGS. CFLAGS is intentionally
|
||||
# wrapped with whitespace in the input to sed(1) so the regex can match zero
|
||||
# or more occurrences of "-MD" or "-MG", surrounded by whitespace to avoid
|
||||
# accidental matches with tokens that happen to contain either of those
|
||||
# strings, without special handling of the beginning or the end of the line.
|
||||
TEST_CFLAGS=`echo "-E -M $CFLAGS " | sed -e 's/\( -M[DG]\)* / /g'`
|
||||
TEST_CFLAGS="-E -M $CFLAGS"
|
||||
|
||||
file="$1"
|
||||
file_define=NV_`echo $file | tr '/.' '_' | tr '-' '_' | tr 'a-z' 'A-Z'`_PRESENT
|
||||
file_define=NV_`echo $file | tr '/.\-a-z' '___A-Z'`_PRESENT
|
||||
|
||||
CODE="#include <$file>"
|
||||
|
||||
@ -99,6 +85,7 @@ test_header_presence() {
|
||||
}
|
||||
|
||||
build_cflags() {
|
||||
ISYSTEM=`$CC -print-file-name=include 2> /dev/null`
|
||||
BASE_CFLAGS="-O2 -D__KERNEL__ \
|
||||
-DKBUILD_BASENAME=\"#conftest$$\" -DKBUILD_MODNAME=\"#conftest$$\" \
|
||||
-nostdinc -isystem $ISYSTEM \
|
||||
@ -125,6 +112,14 @@ build_cflags() {
|
||||
MACH_CFLAGS="-I$HEADERS/asm/mach-xen"
|
||||
fi
|
||||
|
||||
KERNEL_ARCH="$ARCH"
|
||||
|
||||
if [ "$ARCH" = "i386" -o "$ARCH" = "x86_64" ]; then
|
||||
if [ -d "$SOURCES/arch/x86" ]; then
|
||||
KERNEL_ARCH="x86"
|
||||
fi
|
||||
fi
|
||||
|
||||
SOURCE_HEADERS="$HEADERS"
|
||||
SOURCE_ARCH_HEADERS="$SOURCES/arch/$KERNEL_ARCH/include"
|
||||
OUTPUT_HEADERS="$OUTPUT/include"
|
||||
@ -764,24 +759,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_INFO_ADD_CAPABILITY_HAS_CAP_TYPE_ID_ARGS" "" "types"
|
||||
;;
|
||||
|
||||
vmbus_channel_has_ringbuffer_page)
|
||||
#
|
||||
# Check if ringbuffer_page field exist in vmbus_channel structure
|
||||
#
|
||||
# Changed in commit 52a42c2a90226dc61c99bbd0cb096deeb52c334b
|
||||
# ("vmbus: keep pointer to ring buffer page") in v5.0 (2018-09-14)
|
||||
#
|
||||
|
||||
CODE="
|
||||
#include <linux/hyperv.h>
|
||||
|
||||
int conftest_vmbus_channel_has_ringbuffer_page(void) {
|
||||
return offsetof(struct vmbus_channel, ringbuffer_page);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VMBUS_CHANNEL_HAS_RING_BUFFER_PAGE" "" "types"
|
||||
;;
|
||||
|
||||
nvidia_grid_build)
|
||||
if [ -n "$GRID_BUILD" ]; then
|
||||
echo "#define NV_GRID_BUILD" | append_conftest "generic"
|
||||
@ -2691,6 +2668,24 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
enable_apicv)
|
||||
#
|
||||
# Determine if enable_apicv boolean is exported by kernel.
|
||||
#
|
||||
# Added by commit fdf513e37a3bd ("KVM: x86: Use common 'enable_apicv'
|
||||
# variable for both APICv and AVIC")
|
||||
#
|
||||
CODE="
|
||||
$CONFTEST_PREAMBLE
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
bool is_enable_apicv_present() {
|
||||
return enable_apicv;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ENABLE_APICV_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
pci_driver_has_driver_managed_dma)
|
||||
#
|
||||
# Determine if "struct pci_driver" has .driver_managed_dma member.
|
||||
@ -2774,6 +2769,63 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_master_has_leases)
|
||||
#
|
||||
# Determine if drm_master has 'leases', 'lessor', 'lessee_idr' fields.
|
||||
# Also checks for struct drm_mode_revoke_lease.
|
||||
#
|
||||
# Added by commits 2ed077e467ee ("drm: Add drm_object lease infrastructure [v5]")
|
||||
# and 62884cd386b8 ("drm: Add four ioctls for managing drm mode object leases [v7]")
|
||||
# in v4.15 (2017-10-24)
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
|
||||
int conftest_drm_master_leases(void) {
|
||||
return offsetof(struct drm_master, leases);
|
||||
}
|
||||
int conftest_drm_master_lessor(void) {
|
||||
return offsetof(struct drm_master, lessor);
|
||||
}
|
||||
int conftest_drm_master_lessee_idr(void) {
|
||||
return offsetof(struct drm_master, lessee_idr);
|
||||
}
|
||||
int conftest_drm_mode_revoke_lease(void) {
|
||||
return offsetof(struct drm_mode_revoke_lease, lessee_id);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_MASTER_HAS_LEASES" "" "types"
|
||||
;;
|
||||
|
||||
drm_file_get_master)
|
||||
#
|
||||
# Determine if function drm_file_get_master() is present.
|
||||
#
|
||||
# Added by commit 56f0729a510f ("drm: protect drm_master pointers in drm_lease.c")
|
||||
# in v5.15 (2021-07-20)
|
||||
#
|
||||
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
|
||||
void conftest_drm_file_get_master(void) {
|
||||
drm_file_get_master();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_FILE_GET_MASTER_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_connector_lookup)
|
||||
#
|
||||
# Determine if function drm_connector_lookup() is present.
|
||||
@ -3819,6 +3871,40 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_TEGRA_BPMP_SEND_RECEIVE" "" "functions"
|
||||
;;
|
||||
|
||||
cmd_uphy_display_port_init)
|
||||
#
|
||||
# Determine if CMD_UPHY_DISPLAY_PORT_INIT enum present in bpmp-abi header
|
||||
# This enum is used only in Tegra down-stream kernel.
|
||||
#
|
||||
CODE="
|
||||
#include <stdint.h>
|
||||
#include <soc/tegra/bpmp-abi.h>
|
||||
|
||||
int conftest_cmd_uphy_display_port_init(void) {
|
||||
return CMD_UPHY_DISPLAY_PORT_INIT;
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_CMD_UPHY_DISPLAY_PORT_INIT_PRESENT" "" "generic"
|
||||
|
||||
;;
|
||||
|
||||
cmd_uphy_display_port_off)
|
||||
#
|
||||
# Determine if CMD_UPHY_DISPLAY_PORT_OFF enum present in bpmp-abi header
|
||||
# This enum is used only in Tegra down-stream kernel.
|
||||
#
|
||||
CODE="
|
||||
#include <stdint.h>
|
||||
#include <soc/tegra/bpmp-abi.h>
|
||||
|
||||
int conftest_cmd_uphy_display_port_off(void) {
|
||||
return CMD_UPHY_DISPLAY_PORT_OFF;
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_CMD_UPHY_DISPLAY_PORT_OFF_PRESENT" "" "generic"
|
||||
|
||||
;;
|
||||
|
||||
drm_alpha_blending_available)
|
||||
#
|
||||
# Determine if the DRM subsystem supports alpha blending
|
||||
@ -4744,6 +4830,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_REMOVE_MEMORY_HAS_NID_ARG" "" "types"
|
||||
;;
|
||||
|
||||
offline_and_remove_memory)
|
||||
#
|
||||
# Determine if the offline_and_remove_memory function is present.
|
||||
#
|
||||
# Added by commit 08b3acd7a68fc179 ("mm/memory_hotplug: Introduce
|
||||
# offline_and_remove_memory()") in v5.8-rc1 (2020-06-05)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/memory_hotplug.h>
|
||||
void conftest_offline_and_remove_memory() {
|
||||
offline_and_remove_memory();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
device_property_read_u64)
|
||||
#
|
||||
@ -4825,6 +4926,38 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
of_property_read_variable_u32_array)
|
||||
#
|
||||
# Determine if of_property_read_variable_u32_array is present
|
||||
#
|
||||
# Added by commit 1df09bcof (" Move OF property and graph API from
|
||||
# base.c to property.c"
|
||||
#
|
||||
# Test if linux/of.h header file inclusion is successful or not,
|
||||
# depending on that, check for of_property_read_variable_u32_array
|
||||
# presence
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/of.h>
|
||||
" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
CODE="
|
||||
#include <linux/of.h>
|
||||
void conftest_of_property_read_variable_u32_array() {
|
||||
of_property_read_variable_u32_array();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_PROPERTY_READ_VARIABLE_U32_ARRAY_PRESENT" "" "functions"
|
||||
else
|
||||
echo "#undef NV_OF_PROPERTY_READ_VARIABLE_U32_ARRAY_PRESENT" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
devm_of_platform_populate)
|
||||
#
|
||||
# Determine if devm_of_platform_populate() function is present
|
||||
@ -5168,6 +5301,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_device_range)
|
||||
#
|
||||
# Determine if the migrate_device_range() function is present
|
||||
#
|
||||
# migrate_device_range() function was added by commit
|
||||
# e778406b40dbb ("mm/migrate_device.c: add migrate_device_range()")
|
||||
# in v6.1 (2022-09-28).
|
||||
CODE="
|
||||
#include <linux/migrate.h>
|
||||
int conftest_migrate_device_range(void) {
|
||||
migrate_device_range();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MIGRATE_DEVICE_RANGE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
ioasid_get)
|
||||
#
|
||||
# Determine if ioasid_get() function is present
|
||||
@ -5186,6 +5335,25 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOASID_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
mm_pasid_set)
|
||||
#
|
||||
# Determine if mm_pasid_set() function is present
|
||||
#
|
||||
# mm_pasid_set() function was added by commit
|
||||
# 701fac40384f07197b106136012804c3cae0b3de (iommu/sva: Assign a
|
||||
# PASID to mm on PASID allocation and free it on mm exit) in v5.18.
|
||||
# (2022-02-15).
|
||||
CODE="
|
||||
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
|
||||
#include <linux/sched/mm.h>
|
||||
#endif
|
||||
void conftest_mm_pasid_set(void) {
|
||||
mm_pasid_set();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_SET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_crtc_state_has_no_vblank)
|
||||
#
|
||||
# Determine if the 'drm_crtc_state' structure has 'no_vblank'.
|
||||
@ -5453,6 +5621,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE" "" "functions"
|
||||
;;
|
||||
|
||||
pci_rebar_get_possible_sizes)
|
||||
#
|
||||
# Determine if the pci_rebar_get_possible_sizes() function is present.
|
||||
#
|
||||
# Added by commit 8fbdbb66f8c10 ("PCI: Add resizable BAR infrastructure
|
||||
# ") in v5.12
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_pci_rebar_get_possible_sizes(void) {
|
||||
pci_rebar_get_possible_sizes();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_REBAR_GET_POSSIBLE_SIZES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_connector_has_override_edid)
|
||||
#
|
||||
# Determine if 'struct drm_connector' has an 'override_edid' member.
|
||||
@ -5475,6 +5659,27 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HAS_OVERRIDE_EDID" "" "types"
|
||||
;;
|
||||
|
||||
iommu_sva_bind_device_has_drvdata_arg)
|
||||
#
|
||||
# Check if iommu_sva_bind_device() has drvdata parameter.
|
||||
#
|
||||
# Removed by commit be51b1d6bbff48c7d1943a8ff1e5a55777807f6e
|
||||
# ("iommu/sva: Refactoring iommu_sva_bind/unbind_device()")
|
||||
# in v6.2 (2022-10-31)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/device.h>
|
||||
void conftest_iommu_sva_bind_device_has_drvdata_arg(struct device *dev,
|
||||
struct mm_struct *mm,
|
||||
void *drvdata) {
|
||||
(void) iommu_sva_bind_device(dev, mm, drvdata);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_IOMMU_SVA_BIND_DEVICE_HAS_DRVDATA_ARG" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit.
|
||||
#
|
||||
@ -5743,10 +5948,6 @@ case "$5" in
|
||||
|
||||
for i in $*; do compile_test $i; done
|
||||
|
||||
for file in conftest*.d; do
|
||||
rm -f $file > /dev/null 2>&1
|
||||
done
|
||||
|
||||
exit 0
|
||||
;;
|
||||
|
||||
@ -5882,10 +6083,6 @@ case "$5" in
|
||||
|
||||
test_header_presence "${7}"
|
||||
|
||||
for file in conftest*.d; do
|
||||
rm -f $file > /dev/null 2>&1
|
||||
done
|
||||
|
||||
exit $?
|
||||
;;
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-utils.h"
|
||||
#include "nvidia-drm-encoder.h"
|
||||
|
||||
@ -207,6 +208,11 @@ done:
|
||||
|
||||
nv_drm_free(pDetectParams);
|
||||
|
||||
if (status == connector_status_disconnected &&
|
||||
nv_connector->modeset_permission_filep) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -372,6 +378,8 @@ nv_drm_connector_new(struct drm_device *dev,
|
||||
nv_connector->physicalIndex = physicalIndex;
|
||||
nv_connector->type = type;
|
||||
nv_connector->internal = internal;
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
|
||||
strcpy(nv_connector->dpAddress, dpAddress);
|
||||
|
||||
@ -474,4 +482,26 @@ done:
|
||||
return connector;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revoke the permissions on this connector.
|
||||
*/
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector* nv_connector)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
bool ret = true;
|
||||
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -51,6 +51,20 @@ struct nv_drm_connector {
|
||||
|
||||
atomic_t connection_status_dirty;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
/**
|
||||
* @modeset_permission_crtc:
|
||||
*
|
||||
* The crtc using this connector with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct nv_drm_crtc *modeset_permission_crtc;
|
||||
|
||||
struct drm_connector base;
|
||||
};
|
||||
|
||||
@ -84,6 +98,9 @@ nv_drm_get_connector(struct drm_device *dev,
|
||||
NvBool internal,
|
||||
char dpAddress[NVKMS_DP_ADDRESS_STRING_LENGTH]);
|
||||
|
||||
bool nv_drm_connector_revoke_permissions(struct drm_device *dev,
|
||||
struct nv_drm_connector *nv_connector);
|
||||
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#endif /* __NVIDIA_DRM_CONNECTOR_H__ */
|
||||
|
@ -1181,6 +1181,7 @@ static struct drm_crtc *__nv_drm_crtc_create(struct nv_drm_device *nv_dev,
|
||||
nv_crtc->head = head;
|
||||
INIT_LIST_HEAD(&nv_crtc->flip_list);
|
||||
spin_lock_init(&nv_crtc->flip_list_lock);
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
|
||||
ret = drm_crtc_init_with_planes(nv_dev->dev,
|
||||
&nv_crtc->base,
|
||||
@ -1329,7 +1330,7 @@ int nv_drm_get_crtc_crc32_v2_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
@ -1357,7 +1358,7 @@ int nv_drm_get_crtc_crc32_ioctl(struct drm_device *dev,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
crtc = nv_drm_crtc_find(dev, params->crtc_id);
|
||||
crtc = nv_drm_crtc_find(dev, filep, params->crtc_id);
|
||||
if (!crtc) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
@ -35,38 +35,9 @@
|
||||
|
||||
#include <drm/drm_crtc.h>
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#include "nvtypes.h"
|
||||
#include "nvkms-kapi.h"
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_MODE_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
struct nv_drm_crtc {
|
||||
NvU32 head;
|
||||
|
||||
@ -85,6 +56,13 @@ struct nv_drm_crtc {
|
||||
*/
|
||||
spinlock_t flip_list_lock;
|
||||
|
||||
/**
|
||||
* @modeset_permission_filep:
|
||||
*
|
||||
* The filep using this crtc with DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS.
|
||||
*/
|
||||
struct drm_file *modeset_permission_filep;
|
||||
|
||||
struct drm_crtc base;
|
||||
};
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "nvidia-drm-connector.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
@ -706,6 +706,16 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_dmabuf_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
/* check the pDevice since this only gets set if modeset = 1
|
||||
* which is a requirement for the dma_buf extension to work
|
||||
*/
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
return nv_dev->pDevice ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static
|
||||
int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
@ -735,6 +745,455 @@ int nv_drm_get_client_capability_ioctl(struct drm_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
static bool nv_drm_connector_is_dpy_id(struct drm_connector *connector,
|
||||
NvU32 dpyId)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
return nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->hDisplay == dpyId;
|
||||
}
|
||||
|
||||
static int nv_drm_get_dpy_id_for_connector_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params *params = data;
|
||||
// Importantly, drm_connector_lookup (with filep) will only return the
|
||||
// connector if we are master, a lessee with the connector, or not master at
|
||||
// all. It will return NULL if we are a lessee with other connectors.
|
||||
struct drm_connector *connector =
|
||||
nv_drm_connector_lookup(dev, filep, params->connectorId);
|
||||
struct nv_drm_connector *nv_connector;
|
||||
int ret = 0;
|
||||
|
||||
if (!connector) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_connector = to_nv_connector(connector);
|
||||
if (!nv_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nv_connector->nv_detected_encoder) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
params->dpyId = nv_connector->nv_detected_encoder->hDisplay;
|
||||
|
||||
done:
|
||||
nv_drm_connector_put(connector);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_get_connector_id_for_dpy_id_ioctl(struct drm_device *dev,
|
||||
void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params *params = data;
|
||||
struct drm_connector *connector;
|
||||
int ret = -EINVAL;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
|
||||
/* Lookup for existing connector with same dpyId */
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
params->connectorId = connector->base.id;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static NvU32 nv_drm_get_head_bit_from_connector(struct drm_connector *connector)
|
||||
{
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
|
||||
if (connector->state && connector->state->crtc) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(connector->state->crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
} else if (nv_connector->nv_detected_encoder &&
|
||||
nv_connector->nv_detected_encoder->base.crtc) {
|
||||
struct nv_drm_crtc *nv_crtc =
|
||||
to_nv_crtc(nv_connector->nv_detected_encoder->base.crtc);
|
||||
return NVBIT(nv_crtc->head);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nv_drm_grant_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_grant_permissions_params *params = data;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct nv_drm_connector *target_nv_connector = NULL;
|
||||
struct nv_drm_crtc *target_nv_crtc = NULL;
|
||||
struct drm_connector *connector, *target_connector = NULL;
|
||||
struct drm_crtc *crtc;
|
||||
NvU32 head = 0, freeHeadBits, targetHeadBit, possible_crtcs;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/* Get the connector for the dpyId. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
if (nv_drm_connector_is_dpy_id(connector, params->dpyId)) {
|
||||
target_connector =
|
||||
nv_drm_connector_lookup(dev, filep, connector->base.id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
// Importantly, drm_connector_lookup/drm_crtc_find (with filep) will only
|
||||
// return the object if we are master, a lessee with the object, or not
|
||||
// master at all. It will return NULL if we are a lessee with other objects.
|
||||
if (!target_connector) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
target_nv_connector = to_nv_connector(target_connector);
|
||||
possible_crtcs =
|
||||
target_nv_connector->nv_detected_encoder->base.possible_crtcs;
|
||||
|
||||
/* Target connector must not be previously granted. */
|
||||
if (target_nv_connector->modeset_permission_filep) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Add all heads that are owned and not already granted. */
|
||||
freeHeadBits = 0;
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_drm_crtc_find(dev, filep, crtc->base.id) &&
|
||||
!nv_crtc->modeset_permission_filep &&
|
||||
(drm_crtc_mask(crtc) & possible_crtcs)) {
|
||||
freeHeadBits |= NVBIT(nv_crtc->head);
|
||||
}
|
||||
}
|
||||
|
||||
targetHeadBit = nv_drm_get_head_bit_from_connector(target_connector);
|
||||
if (targetHeadBit & freeHeadBits) {
|
||||
/* If a crtc is already being used by this connector, use it. */
|
||||
freeHeadBits = targetHeadBit;
|
||||
} else {
|
||||
/* Otherwise, remove heads that are in use by other connectors. */
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
freeHeadBits &= ~nv_drm_get_head_bit_from_connector(connector);
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Fail if no heads are available. */
|
||||
if (!freeHeadBits) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop through the crtc again and find a matching head.
|
||||
* Record the filep that is using the crtc and the connector.
|
||||
*/
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (freeHeadBits & NVBIT(nv_crtc->head)) {
|
||||
target_nv_crtc = nv_crtc;
|
||||
head = nv_crtc->head;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvKms->grantPermissions(params->fd, nv_dev->pDevice, head,
|
||||
params->dpyId)) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
target_nv_connector->modeset_permission_crtc = target_nv_crtc;
|
||||
target_nv_connector->modeset_permission_filep = filep;
|
||||
target_nv_crtc->modeset_permission_filep = filep;
|
||||
|
||||
done:
|
||||
if (target_connector) {
|
||||
nv_drm_connector_put(target_connector);
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool nv_drm_revoke_connector(struct nv_drm_device *nv_dev,
|
||||
struct nv_drm_connector *nv_connector)
|
||||
{
|
||||
bool ret = true;
|
||||
if (nv_connector->modeset_permission_crtc) {
|
||||
if (nv_connector->nv_detected_encoder) {
|
||||
ret = nvKms->revokePermissions(
|
||||
nv_dev->pDevice, nv_connector->modeset_permission_crtc->head,
|
||||
nv_connector->nv_detected_encoder->hDisplay);
|
||||
}
|
||||
nv_connector->modeset_permission_crtc->modeset_permission_filep = NULL;
|
||||
nv_connector->modeset_permission_crtc = NULL;
|
||||
}
|
||||
nv_connector->modeset_permission_filep = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission(struct drm_device *dev,
|
||||
struct drm_file *filep, NvU32 dpyId)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
int ret = 0;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If dpyId is set, only revoke those specific resources. Otherwise,
|
||||
* it is from closing the file so revoke all resources for that filep.
|
||||
*/
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep == filep &&
|
||||
(!dpyId || nv_drm_connector_is_dpy_id(connector, dpyId))) {
|
||||
if (!nv_drm_connector_revoke_permissions(dev, nv_connector)) {
|
||||
ret = -EINVAL;
|
||||
// Continue trying to revoke as much as possible.
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep == filep && !dpyId) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nv_drm_revoke_permission_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct drm_nvidia_revoke_permissions_params *params = data;
|
||||
if (!params->dpyId) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return nv_drm_revoke_permission(dev, filep, params->dpyId);
|
||||
}
|
||||
|
||||
static void nv_drm_postclose(struct drm_device *dev, struct drm_file *filep)
|
||||
{
|
||||
/*
|
||||
* Some systems like android can reach here without initializing the
|
||||
* device, so check for that.
|
||||
*/
|
||||
if (dev->mode_config.num_crtc > 0 &&
|
||||
dev->mode_config.crtc_list.next != NULL &&
|
||||
dev->mode_config.crtc_list.prev != NULL &&
|
||||
dev->mode_config.num_connector > 0 &&
|
||||
dev->mode_config.connector_list.next != NULL &&
|
||||
dev->mode_config.connector_list.prev != NULL) {
|
||||
nv_drm_revoke_permission(dev, filep, 0);
|
||||
}
|
||||
}
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
static struct drm_master *nv_drm_find_lessee(struct drm_master *master,
|
||||
int lessee_id)
|
||||
{
|
||||
int object;
|
||||
void *entry;
|
||||
|
||||
while (master->lessor != NULL) {
|
||||
master = master->lessor;
|
||||
}
|
||||
|
||||
idr_for_each_entry(&master->lessee_idr, entry, object)
|
||||
{
|
||||
if (object == lessee_id) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void nv_drm_get_revoked_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, unsigned int cmd,
|
||||
unsigned long arg, int **objects,
|
||||
int *objects_count)
|
||||
{
|
||||
unsigned int ioc_size;
|
||||
struct drm_mode_revoke_lease revoke_lease;
|
||||
struct drm_master *lessor, *lessee;
|
||||
void *entry;
|
||||
int *objs;
|
||||
int obj, obj_count, obj_i;
|
||||
|
||||
ioc_size = _IOC_SIZE(cmd);
|
||||
if (ioc_size > sizeof(revoke_lease)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (copy_from_user(&revoke_lease, (void __user *)arg, ioc_size) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lessor = nv_drm_file_get_master(filep);
|
||||
if (lessor == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->mode_config.idr_mutex);
|
||||
lessee = nv_drm_find_lessee(lessor, revoke_lease.lessee_id);
|
||||
|
||||
if (lessee == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_count = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
++obj_count;
|
||||
}
|
||||
if (obj_count == 0) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
objs = nv_drm_calloc(obj_count, sizeof(int));
|
||||
if (objs == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
obj_i = 0;
|
||||
idr_for_each_entry(&lessee->leases, entry, obj) {
|
||||
objs[obj_i++] = obj;
|
||||
}
|
||||
*objects = objs;
|
||||
*objects_count = obj_count;
|
||||
|
||||
done:
|
||||
mutex_unlock(&dev->mode_config.idr_mutex);
|
||||
drm_master_put(&lessor);
|
||||
}
|
||||
|
||||
static bool nv_drm_is_in_objects(int object, int *objects, int objects_count)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < objects_count; ++i) {
|
||||
if (objects[i] == object) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void nv_drm_finish_revoking_objects(struct drm_device *dev,
|
||||
struct drm_file *filep, int *objects,
|
||||
int objects_count)
|
||||
{
|
||||
struct drm_connector *connector;
|
||||
struct drm_crtc *crtc;
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
struct drm_connector_list_iter conn_iter;
|
||||
#endif
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
int ret = 0;
|
||||
struct drm_modeset_acquire_ctx ctx;
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE,
|
||||
ret);
|
||||
#else
|
||||
mutex_lock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_begin(dev, &conn_iter);
|
||||
#endif
|
||||
nv_drm_for_each_connector(connector, &conn_iter, dev) {
|
||||
struct nv_drm_connector *nv_connector = to_nv_connector(connector);
|
||||
if (nv_connector->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(connector->base.id, objects, objects_count)) {
|
||||
nv_drm_connector_revoke_permissions(dev, nv_connector);
|
||||
}
|
||||
}
|
||||
#if defined(NV_DRM_CONNECTOR_LIST_ITER_PRESENT)
|
||||
nv_drm_connector_list_iter_end(&conn_iter);
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_crtc(crtc, dev) {
|
||||
struct nv_drm_crtc *nv_crtc = to_nv_crtc(crtc);
|
||||
if (nv_crtc->modeset_permission_filep &&
|
||||
nv_drm_is_in_objects(crtc->base.id, objects, objects_count)) {
|
||||
nv_crtc->modeset_permission_filep = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#if NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT == 3
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
#else
|
||||
mutex_unlock(&dev->mode_config.mutex);
|
||||
#endif
|
||||
}
|
||||
#endif /* NV_DRM_MASTER_HAS_LEASES */
|
||||
|
||||
#if defined(NV_DRM_BUS_PRESENT)
|
||||
|
||||
#if defined(NV_DRM_BUS_HAS_GET_IRQ)
|
||||
@ -766,12 +1225,50 @@ static struct drm_bus nv_drm_bus = {
|
||||
|
||||
#endif /* NV_DRM_BUS_PRESENT */
|
||||
|
||||
/*
|
||||
* Wrapper around drm_ioctl to hook in to upstream ioctl.
|
||||
*
|
||||
* Currently used to add additional handling to REVOKE_LEASE.
|
||||
*/
|
||||
static long nv_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long retcode;
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
struct drm_file *file_priv = filp->private_data;
|
||||
struct drm_device *dev = file_priv->minor->dev;
|
||||
int *objects = NULL;
|
||||
int objects_count = 0;
|
||||
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE) {
|
||||
// Save the revoked objects before revoking.
|
||||
nv_drm_get_revoked_objects(dev, file_priv, cmd, arg, &objects,
|
||||
&objects_count);
|
||||
}
|
||||
#endif
|
||||
|
||||
retcode = drm_ioctl(filp, cmd, arg);
|
||||
|
||||
#if defined(NV_DRM_MASTER_HAS_LEASES)
|
||||
if (cmd == DRM_IOCTL_MODE_REVOKE_LEASE && objects) {
|
||||
if (retcode == 0) {
|
||||
// If revoking was successful, finish revoking the objects.
|
||||
nv_drm_finish_revoking_objects(dev, file_priv, objects,
|
||||
objects_count);
|
||||
}
|
||||
nv_drm_free(objects);
|
||||
}
|
||||
#endif
|
||||
|
||||
return retcode;
|
||||
}
|
||||
|
||||
static const struct file_operations nv_drm_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
|
||||
.open = drm_open,
|
||||
.release = drm_release,
|
||||
.unlocked_ioctl = drm_ioctl,
|
||||
.unlocked_ioctl = nv_drm_ioctl,
|
||||
#if defined(CONFIG_COMPAT)
|
||||
.compat_ioctl = drm_compat_ioctl,
|
||||
#endif
|
||||
@ -807,11 +1304,11 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_SUPPORTED,
|
||||
nv_drm_fence_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_fence_context_create_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_PRIME_FENCE_CONTEXT_CREATE,
|
||||
nv_drm_prime_fence_context_create_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_FENCE_ATTACH,
|
||||
nv_drm_gem_fence_attach_ioctl,
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_PRIME_FENCE_ATTACH,
|
||||
nv_drm_gem_prime_fence_attach_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
@ -837,6 +1334,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GEM_IDENTIFY_OBJECT,
|
||||
nv_drm_gem_identify_object_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_DMABUF_SUPPORTED,
|
||||
nv_drm_dmabuf_supported_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID,
|
||||
nv_drm_get_dpy_id_for_connector_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID,
|
||||
nv_drm_get_connector_id_for_dpy_id_ioctl,
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GRANT_PERMISSIONS,
|
||||
nv_drm_grant_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_REVOKE_PERMISSIONS,
|
||||
nv_drm_revoke_permission_ioctl,
|
||||
DRM_UNLOCKED|DRM_MASTER),
|
||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||
};
|
||||
|
||||
@ -879,6 +1391,9 @@ static struct drm_driver nv_drm_driver = {
|
||||
|
||||
.load = nv_drm_load,
|
||||
.unload = nv_drm_unload,
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
.postclose = nv_drm_postclose,
|
||||
#endif
|
||||
|
||||
.fops = &nv_drm_fops,
|
||||
|
||||
|
@ -31,17 +31,28 @@
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-dma-resv-helper.h"
|
||||
|
||||
#if defined(NV_DRM_FENCE_AVAILABLE)
|
||||
|
||||
#include "nvidia-dma-fence-helper.h"
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
struct nv_drm_device *nv_dev;
|
||||
struct nv_drm_fence_context;
|
||||
|
||||
struct nv_drm_fence_context_ops {
|
||||
void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
|
||||
};
|
||||
|
||||
struct nv_drm_fence_context {
|
||||
const struct nv_drm_fence_context_ops *ops;
|
||||
|
||||
struct nv_drm_device *nv_dev;
|
||||
uint32_t context;
|
||||
};
|
||||
|
||||
struct nv_drm_prime_fence_context {
|
||||
struct nv_drm_fence_context base;
|
||||
|
||||
NvU64 fenceSemIndex; /* Index into semaphore surface */
|
||||
|
||||
@ -53,10 +64,10 @@ struct nv_drm_fence_context {
|
||||
spinlock_t lock;
|
||||
|
||||
/*
|
||||
* Software signaling structures. __nv_drm_fence_context_new()
|
||||
* allocates channel event and __nv_drm_fence_context_destroy() frees it.
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
* Software signaling structures. __nv_drm_prime_fence_context_new()
|
||||
* allocates channel event and __nv_drm_prime_fence_context_destroy() frees
|
||||
* it. There are no simultaneous read/write access to 'cb', therefore it
|
||||
* does not require spin-lock protection.
|
||||
*/
|
||||
struct NvKmsKapiChannelEvent *cb;
|
||||
|
||||
@ -79,7 +90,7 @@ struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
|
||||
}
|
||||
|
||||
static const char*
|
||||
nv_drm_gem_prime_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
|
||||
{
|
||||
return "NVIDIA";
|
||||
}
|
||||
@ -122,7 +133,7 @@ nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
|
||||
}
|
||||
|
||||
static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
|
||||
.get_driver_name = nv_drm_gem_prime_fence_op_get_driver_name,
|
||||
.get_driver_name = nv_drm_gem_fence_op_get_driver_name,
|
||||
.get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
|
||||
.enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
|
||||
.release = nv_drm_gem_prime_fence_op_release,
|
||||
@ -138,7 +149,7 @@ __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
|
||||
}
|
||||
|
||||
static void nv_drm_gem_prime_force_fence_signal(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
struct nv_drm_prime_fence_context *nv_fence_context)
|
||||
{
|
||||
WARN_ON(!spin_is_locked(&nv_fence_context->lock));
|
||||
|
||||
@ -158,7 +169,7 @@ static void nv_drm_gem_prime_fence_event
|
||||
NvU32 dataU32
|
||||
)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context = dataPtr;
|
||||
struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
@ -187,11 +198,53 @@ static void nv_drm_gem_prime_fence_event
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_fence_context_create_params *p)
|
||||
static inline struct nv_drm_prime_fence_context*
|
||||
to_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
|
||||
return (struct nv_drm_prime_fence_context *)nv_fence_context;
|
||||
}
|
||||
|
||||
static void __nv_drm_prime_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_fence_context *nv_fence_context;
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
to_prime_fence_context(nv_fence_context);
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_prime_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_prime_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
|
||||
.destroy = __nv_drm_prime_fence_context_destroy,
|
||||
};
|
||||
|
||||
static inline struct nv_drm_prime_fence_context *
|
||||
__nv_drm_prime_fence_context_new(
|
||||
struct nv_drm_device *nv_dev,
|
||||
struct drm_nvidia_prime_fence_context_create_params *p)
|
||||
{
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context;
|
||||
struct NvKmsKapiMemory *pSemSurface;
|
||||
NvU32 *pLinearAddress;
|
||||
|
||||
@ -225,9 +278,9 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* event for it.
|
||||
*/
|
||||
|
||||
if ((nv_fence_context = nv_drm_calloc(
|
||||
if ((nv_prime_fence_context = nv_drm_calloc(
|
||||
1,
|
||||
sizeof(*nv_fence_context))) == NULL) {
|
||||
sizeof(*nv_prime_fence_context))) == NULL) {
|
||||
goto failed_alloc_fence_context;
|
||||
}
|
||||
|
||||
@ -236,17 +289,18 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* to check a return value.
|
||||
*/
|
||||
|
||||
*nv_fence_context = (struct nv_drm_fence_context) {
|
||||
.nv_dev = nv_dev,
|
||||
.context = nv_dma_fence_context_alloc(1),
|
||||
*nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
|
||||
.base.ops = &nv_drm_prime_fence_context_ops,
|
||||
.base.nv_dev = nv_dev,
|
||||
.base.context = nv_dma_fence_context_alloc(1),
|
||||
.pSemSurface = pSemSurface,
|
||||
.pLinearAddress = pLinearAddress,
|
||||
.fenceSemIndex = p->index,
|
||||
};
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence_context->pending);
|
||||
INIT_LIST_HEAD(&nv_prime_fence_context->pending);
|
||||
|
||||
spin_lock_init(&nv_fence_context->lock);
|
||||
spin_lock_init(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* Except 'cb', the fence context should be completely initialized
|
||||
@ -256,22 +310,22 @@ static inline struct nv_drm_fence_context *__nv_drm_fence_context_new(
|
||||
* There are no simultaneous read/write access to 'cb', therefore it does
|
||||
* not require spin-lock protection.
|
||||
*/
|
||||
nv_fence_context->cb =
|
||||
nv_prime_fence_context->cb =
|
||||
nvKms->allocateChannelEvent(nv_dev->pDevice,
|
||||
nv_drm_gem_prime_fence_event,
|
||||
nv_fence_context,
|
||||
nv_prime_fence_context,
|
||||
p->event_nvkms_params_ptr,
|
||||
p->event_nvkms_params_size);
|
||||
if (!nv_fence_context->cb) {
|
||||
if (!nv_prime_fence_context->cb) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev,
|
||||
"Failed to allocate fence signaling event");
|
||||
goto failed_to_allocate_channel_event;
|
||||
}
|
||||
|
||||
return nv_fence_context;
|
||||
return nv_prime_fence_context;
|
||||
|
||||
failed_to_allocate_channel_event:
|
||||
nv_drm_free(nv_fence_context);
|
||||
nv_drm_free(nv_prime_fence_context);
|
||||
|
||||
failed_alloc_fence_context:
|
||||
|
||||
@ -287,38 +341,8 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __nv_drm_fence_context_destroy(
|
||||
struct nv_drm_fence_context *nv_fence_context)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
|
||||
|
||||
/*
|
||||
* Free channel event before destroying the fence context, otherwise event
|
||||
* callback continue to get called.
|
||||
*/
|
||||
nvKms->freeChannelEvent(nv_dev->pDevice, nv_fence_context->cb);
|
||||
|
||||
/* Force signal all pending fences and empty pending list */
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
|
||||
/* Free nvkms resources */
|
||||
|
||||
nvKms->unmapMemory(nv_dev->pDevice,
|
||||
nv_fence_context->pSemSurface,
|
||||
NVKMS_KAPI_MAPPING_TYPE_KERNEL,
|
||||
(void *) nv_fence_context->pLinearAddress);
|
||||
|
||||
nvKms->freeMemory(nv_dev->pDevice, nv_fence_context->pSemSurface);
|
||||
|
||||
nv_drm_free(nv_fence_context);
|
||||
}
|
||||
|
||||
static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context,
|
||||
unsigned int seqno)
|
||||
{
|
||||
struct nv_drm_prime_fence *nv_fence;
|
||||
@ -329,14 +353,14 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&nv_fence_context->lock);
|
||||
spin_lock(&nv_prime_fence_context->lock);
|
||||
|
||||
/*
|
||||
* If seqno wrapped, force signal fences to make sure none of them
|
||||
* get stuck.
|
||||
*/
|
||||
if (seqno < nv_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_fence_context);
|
||||
if (seqno < nv_prime_fence_context->last_seqno) {
|
||||
nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&nv_fence->list_entry);
|
||||
@ -344,17 +368,17 @@ static nv_dma_fence_t *__nv_drm_fence_context_create_fence(
|
||||
spin_lock_init(&nv_fence->lock);
|
||||
|
||||
nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
|
||||
&nv_fence->lock, nv_fence_context->context,
|
||||
&nv_fence->lock, nv_prime_fence_context->base.context,
|
||||
seqno);
|
||||
|
||||
/* The context maintains a reference to any pending fences. */
|
||||
nv_dma_fence_get(&nv_fence->base);
|
||||
|
||||
list_add_tail(&nv_fence->list_entry, &nv_fence_context->pending);
|
||||
list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
|
||||
|
||||
nv_fence_context->last_seqno = seqno;
|
||||
nv_prime_fence_context->last_seqno = seqno;
|
||||
|
||||
spin_unlock(&nv_fence_context->lock);
|
||||
spin_unlock(&nv_prime_fence_context->lock);
|
||||
|
||||
out:
|
||||
return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
|
||||
@ -388,12 +412,15 @@ static inline struct nv_drm_gem_fence_context *to_gem_fence_context(
|
||||
* because tear down sequence calls to flush all existing
|
||||
* worker thread.
|
||||
*/
|
||||
static void __nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
static void
|
||||
__nv_drm_gem_fence_context_free(struct nv_drm_gem_object *nv_gem)
|
||||
{
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context =
|
||||
to_gem_fence_context(nv_gem);
|
||||
struct nv_drm_fence_context *nv_fence_context =
|
||||
nv_gem_fence_context->nv_fence_context;
|
||||
|
||||
__nv_drm_fence_context_destroy(nv_gem_fence_context->nv_fence_context);
|
||||
nv_fence_context->ops->destroy(nv_fence_context);
|
||||
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
}
|
||||
@ -403,7 +430,8 @@ const struct nv_drm_gem_object_funcs nv_gem_fence_context_ops = {
|
||||
};
|
||||
|
||||
static inline
|
||||
struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
struct nv_drm_gem_fence_context *
|
||||
__nv_drm_gem_object_fence_context_lookup(
|
||||
struct drm_device *dev,
|
||||
struct drm_file *filp,
|
||||
u32 handle)
|
||||
@ -419,11 +447,13 @@ struct nv_drm_gem_fence_context *__nv_drm_gem_object_fence_context_lookup(
|
||||
return to_gem_fence_context(nv_gem);
|
||||
}
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
static int
|
||||
__nv_drm_gem_fence_context_create(struct drm_device *dev,
|
||||
struct nv_drm_fence_context *nv_fence_context,
|
||||
u32 *handle,
|
||||
struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_fence_context_create_params *p = data;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context = NULL;
|
||||
|
||||
if ((nv_gem_fence_context = nv_drm_calloc(
|
||||
@ -432,10 +462,7 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((nv_gem_fence_context->nv_fence_context =
|
||||
__nv_drm_fence_context_new(nv_dev, p)) == NULL) {
|
||||
goto fence_context_new_failed;
|
||||
}
|
||||
nv_gem_fence_context->nv_fence_context = nv_fence_context;
|
||||
|
||||
nv_drm_gem_object_init(nv_dev,
|
||||
&nv_gem_fence_context->base,
|
||||
@ -445,21 +472,45 @@ int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
|
||||
return nv_drm_gem_handle_create_drop_reference(filep,
|
||||
&nv_gem_fence_context->base,
|
||||
&p->handle);
|
||||
|
||||
fence_context_new_failed:
|
||||
nv_drm_free(nv_gem_fence_context);
|
||||
handle);
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_prime_fence_context_create_params *p = data;
|
||||
struct nv_drm_prime_fence_context *nv_prime_fence_context =
|
||||
__nv_drm_prime_fence_context_new(nv_dev, p);
|
||||
int err;
|
||||
|
||||
if (!nv_prime_fence_context) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = __nv_drm_gem_fence_context_create(dev,
|
||||
&nv_prime_fence_context->base,
|
||||
&p->handle,
|
||||
filep);
|
||||
if (err) {
|
||||
__nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
done:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(dev);
|
||||
struct drm_nvidia_gem_fence_attach_params *p = data;
|
||||
struct drm_nvidia_gem_prime_fence_attach_params *p = data;
|
||||
|
||||
struct nv_drm_gem_object *nv_gem;
|
||||
struct nv_drm_gem_fence_context *nv_gem_fence_context;
|
||||
@ -490,9 +541,22 @@ int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
goto fence_context_lookup_failed;
|
||||
}
|
||||
|
||||
if (IS_ERR(fence = __nv_drm_fence_context_create_fence(
|
||||
nv_gem_fence_context->nv_fence_context,
|
||||
p->sem_thresh))) {
|
||||
if (nv_gem_fence_context->nv_fence_context->ops !=
|
||||
&nv_drm_prime_fence_context_ops) {
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"Wrong fence context type: 0x%08x",
|
||||
p->fence_context_handle);
|
||||
|
||||
goto fence_context_create_fence_failed;
|
||||
}
|
||||
|
||||
fence = __nv_drm_prime_fence_context_create_fence(
|
||||
to_prime_fence_context(nv_gem_fence_context->nv_fence_context),
|
||||
p->sem_thresh);
|
||||
|
||||
if (IS_ERR(fence)) {
|
||||
ret = PTR_ERR(fence);
|
||||
|
||||
NV_DRM_DEV_LOG_ERR(
|
@ -35,10 +35,10 @@ struct drm_device;
|
||||
int nv_drm_fence_supported_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_fence_context_create_ioctl(struct drm_device *dev,
|
||||
int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
int nv_drm_gem_fence_attach_ioctl(struct drm_device *dev,
|
||||
int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
|
||||
void *data, struct drm_file *filep);
|
||||
|
||||
#endif /* NV_DRM_FENCE_AVAILABLE */
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-ioctl.h"
|
||||
#include "nvidia-drm-prime-fence.h"
|
||||
#include "nvidia-drm-fence.h"
|
||||
#include "nvidia-drm-gem.h"
|
||||
#include "nvidia-drm-gem-nvkms-memory.h"
|
||||
#include "nvidia-drm-gem-user-memory.h"
|
||||
|
@ -28,6 +28,8 @@
|
||||
*/
|
||||
|
||||
#include "nvidia-drm-helper.h"
|
||||
#include "nvidia-drm-priv.h"
|
||||
#include "nvidia-drm-crtc.h"
|
||||
|
||||
#include "nvmisc.h"
|
||||
|
||||
@ -148,6 +150,18 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
goto free;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
nv_drm_for_each_plane(plane, dev) {
|
||||
plane_state = drm_atomic_get_plane_state(state, plane);
|
||||
if (IS_ERR(plane_state)) {
|
||||
ret = PTR_ERR(plane_state);
|
||||
goto free;
|
||||
}
|
||||
|
||||
plane_state->rotation = DRM_MODE_ROTATE_0;
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_drm_for_each_connector_in_state(state, conn, conn_state, i) {
|
||||
ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
|
||||
if (ret < 0)
|
||||
|
@ -35,6 +35,35 @@
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ALPHA_BLENDING_AVAILABLE) || defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_ROTATE_* , DRM_REFLECT_* */
|
||||
#include <drm/drm_blend.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/* For DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* */
|
||||
#include <uapi/drm/drm_mode.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_ROTATION_AVAILABLE)
|
||||
/*
|
||||
* 19-05-2017 c2c446ad29437bb92b157423c632286608ebd3ec has added
|
||||
* DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* to UAPI and removed
|
||||
* DRM_ROTATE_* and DRM_REFLECT_*
|
||||
*/
|
||||
#if !defined(DRM_MODE_ROTATE_0)
|
||||
#define DRM_MODE_ROTATE_0 DRM_ROTATE_0
|
||||
#define DRM_MODE_ROTATE_90 DRM_ROTATE_90
|
||||
#define DRM_MODE_ROTATE_180 DRM_ROTATE_180
|
||||
#define DRM_MODE_ROTATE_270 DRM_ROTATE_270
|
||||
#define DRM_MODE_REFLECT_X DRM_REFLECT_X
|
||||
#define DRM_MODE_REFLECT_Y DRM_REFLECT_Y
|
||||
#define DRM_MODE_ROTATE_MASK DRM_ROTATE_MASK
|
||||
#define DRM_MODE_REFLECT_MASK DRM_REFLECT_MASK
|
||||
#endif
|
||||
|
||||
#endif //NV_DRM_ROTATION_AVAILABLE
|
||||
|
||||
/*
|
||||
* drm_dev_put() is added by commit 9a96f55034e41b4e002b767e9218d55f03bdff7d
|
||||
* (2017-09-26) and drm_dev_unref() is removed by
|
||||
@ -277,11 +306,33 @@ int nv_drm_atomic_helper_disable_all(struct drm_device *dev,
|
||||
for_each_plane_in_state(__state, plane, plane_state, __i)
|
||||
#endif
|
||||
|
||||
static inline struct drm_crtc *nv_drm_crtc_find(struct drm_device *dev,
|
||||
static inline struct drm_connector *
|
||||
nv_drm_connector_lookup(struct drm_device *dev, struct drm_file *filep,
|
||||
uint32_t id)
|
||||
{
|
||||
#if !defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
return drm_connector_find(dev, id);
|
||||
#elif defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_connector_lookup(dev, filep, id);
|
||||
#else
|
||||
return drm_connector_lookup(dev, id);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void nv_drm_connector_put(struct drm_connector *connector)
|
||||
{
|
||||
#if defined(NV_DRM_CONNECTOR_PUT_PRESENT)
|
||||
drm_connector_put(connector);
|
||||
#elif defined(NV_DRM_CONNECTOR_LOOKUP_PRESENT)
|
||||
drm_connector_unreference(connector);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct drm_crtc *
|
||||
nv_drm_crtc_find(struct drm_device *dev, struct drm_file *filep, uint32_t id)
|
||||
{
|
||||
#if defined(NV_DRM_MODE_OBJECT_FIND_HAS_FILE_PRIV_ARG)
|
||||
return drm_crtc_find(dev, NULL /* file_priv */, id);
|
||||
return drm_crtc_find(dev, filep, id);
|
||||
#else
|
||||
return drm_crtc_find(dev, id);
|
||||
#endif
|
||||
@ -297,6 +348,30 @@ static inline struct drm_encoder *nv_drm_encoder_find(struct drm_device *dev,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_DRM_AUTH_H_PRESENT)
|
||||
#include <drm/drm_auth.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_FILE_H_PRESENT)
|
||||
#include <drm/drm_file.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_file_get_master() added by commit 56f0729a510f ("drm: protect drm_master
|
||||
* pointers in drm_lease.c") in v5.15 (2021-07-20)
|
||||
*/
|
||||
static inline struct drm_master *nv_drm_file_get_master(struct drm_file *filep)
|
||||
{
|
||||
#if defined(NV_DRM_FILE_GET_MASTER_PRESENT)
|
||||
return drm_file_get_master(filep);
|
||||
#else
|
||||
if (filep->master) {
|
||||
return drm_master_get(filep->master);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* drm_connector_for_each_possible_encoder() is added by commit
|
||||
* 83aefbb887b59df0b3520965c3701e01deacfc52 which was Signed-off-by:
|
||||
|
@ -34,8 +34,8 @@
|
||||
#define DRM_NVIDIA_GEM_IMPORT_USERSPACE_MEMORY 0x02
|
||||
#define DRM_NVIDIA_GET_DEV_INFO 0x03
|
||||
#define DRM_NVIDIA_FENCE_SUPPORTED 0x04
|
||||
#define DRM_NVIDIA_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE 0x05
|
||||
#define DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH 0x06
|
||||
#define DRM_NVIDIA_GET_CLIENT_CAPABILITY 0x08
|
||||
#define DRM_NVIDIA_GEM_EXPORT_NVKMS_MEMORY 0x09
|
||||
#define DRM_NVIDIA_GEM_MAP_OFFSET 0x0a
|
||||
@ -43,6 +43,11 @@
|
||||
#define DRM_NVIDIA_GET_CRTC_CRC32_V2 0x0c
|
||||
#define DRM_NVIDIA_GEM_EXPORT_DMABUF_MEMORY 0x0d
|
||||
#define DRM_NVIDIA_GEM_IDENTIFY_OBJECT 0x0e
|
||||
#define DRM_NVIDIA_DMABUF_SUPPORTED 0x0f
|
||||
#define DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID 0x10
|
||||
#define DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID 0x11
|
||||
#define DRM_NVIDIA_GRANT_PERMISSIONS 0x12
|
||||
#define DRM_NVIDIA_REVOKE_PERMISSIONS 0x13
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_IMPORT_NVKMS_MEMORY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IMPORT_NVKMS_MEMORY), \
|
||||
@ -65,17 +70,20 @@
|
||||
#if defined(NV_LINUX)
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_SUPPORTED)
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED \
|
||||
DRM_IO(DRM_COMMAND_BASE + DRM_NVIDIA_DMABUF_SUPPORTED)
|
||||
#else
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_SUPPORTED 0
|
||||
#define DRM_IOCTL_NVIDIA_DMABUF_SUPPORTED 0
|
||||
#endif
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_FENCE_CONTEXT_CREATE), \
|
||||
struct drm_nvidia_fence_context_create_params)
|
||||
#define DRM_IOCTL_NVIDIA_PRIME_FENCE_CONTEXT_CREATE \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_PRIME_FENCE_CONTEXT_CREATE),\
|
||||
struct drm_nvidia_prime_fence_context_create_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GEM_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_fence_attach_params)
|
||||
#define DRM_IOCTL_NVIDIA_GEM_PRIME_FENCE_ATTACH \
|
||||
DRM_IOW((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_PRIME_FENCE_ATTACH), \
|
||||
struct drm_nvidia_gem_prime_fence_attach_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CLIENT_CAPABILITY \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CLIENT_CAPABILITY), \
|
||||
@ -109,6 +117,22 @@
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GEM_IDENTIFY_OBJECT), \
|
||||
struct drm_nvidia_gem_identify_object_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_DPY_ID_FOR_CONNECTOR_ID),\
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GET_CONNECTOR_ID_FOR_DPY_ID),\
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_GRANT_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_GRANT_PERMISSIONS), \
|
||||
struct drm_nvidia_grant_permissions_params)
|
||||
|
||||
#define DRM_IOCTL_NVIDIA_REVOKE_PERMISSIONS \
|
||||
DRM_IOWR((DRM_COMMAND_BASE + DRM_NVIDIA_REVOKE_PERMISSIONS), \
|
||||
struct drm_nvidia_revoke_permissions_params)
|
||||
|
||||
struct drm_nvidia_gem_import_nvkms_memory_params {
|
||||
uint64_t mem_size; /* IN */
|
||||
|
||||
@ -136,7 +160,7 @@ struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t sector_layout; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_fence_context_create_params {
|
||||
struct drm_nvidia_prime_fence_context_create_params {
|
||||
uint32_t handle; /* OUT GEM handle to fence context */
|
||||
|
||||
uint32_t index; /* IN Index of semaphore to use for fencing */
|
||||
@ -151,7 +175,7 @@ struct drm_nvidia_fence_context_create_params {
|
||||
uint64_t event_nvkms_params_size; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_gem_fence_attach_params {
|
||||
struct drm_nvidia_gem_prime_fence_attach_params {
|
||||
uint32_t handle; /* IN GEM handle to attach fence to */
|
||||
uint32_t fence_context_handle; /* IN GEM handle to fence context on which fence is run on */
|
||||
uint32_t sem_thresh; /* IN Semaphore value to reach before signal */
|
||||
@ -232,4 +256,23 @@ struct drm_nvidia_gem_identify_object_params {
|
||||
drm_nvidia_gem_object_type object_type; /* OUT GEM object type */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_dpy_id_for_connector_id_params {
|
||||
uint32_t connectorId; /* IN */
|
||||
uint32_t dpyId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_get_connector_id_for_dpy_id_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
uint32_t connectorId; /* OUT */
|
||||
};
|
||||
|
||||
struct drm_nvidia_grant_permissions_params {
|
||||
int32_t fd; /* IN */
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
struct drm_nvidia_revoke_permissions_params {
|
||||
uint32_t dpyId; /* IN */
|
||||
};
|
||||
|
||||
#endif /* _UAPI_NVIDIA_DRM_IOCTL_H_ */
|
||||
|
@ -16,7 +16,7 @@ NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-connector.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-gem.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fb.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-modeset.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-prime-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-fence.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-linux.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nvidia-drm-helper.c
|
||||
NVIDIA_DRM_SOURCES += nvidia-drm/nv-pci-table.c
|
||||
@ -124,3 +124,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_has_override_edid
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_master_has_leases
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_file_get_master
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_modeset_lock_all_end
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
|
@ -75,6 +75,15 @@ MODULE_PARM_DESC(malloc_verbose, "Report information about malloc calls on modul
|
||||
static bool malloc_verbose = false;
|
||||
module_param_named(malloc_verbose, malloc_verbose, bool, 0400);
|
||||
|
||||
/* This parameter is used to find the dpy override conf file */
|
||||
#define NVKMS_CONF_FILE_SPECIFIED (nvkms_conf != NULL)
|
||||
|
||||
MODULE_PARM_DESC(config_file,
|
||||
"Path to the nvidia-modeset configuration file "
|
||||
"(default: disabled)");
|
||||
static char *nvkms_conf = NULL;
|
||||
module_param_named(config_file, nvkms_conf, charp, 0400);
|
||||
|
||||
static atomic_t nvkms_alloc_called_count;
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void)
|
||||
@ -1362,6 +1371,117 @@ static void nvkms_proc_exit(void)
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* NVKMS Config File Read
|
||||
************************************************************************/
|
||||
static NvBool nvkms_fs_mounted(void)
|
||||
{
|
||||
return current->fs != NULL;
|
||||
}
|
||||
|
||||
static size_t nvkms_config_file_open
|
||||
(
|
||||
char *fname,
|
||||
char ** const buff
|
||||
)
|
||||
{
|
||||
int i = 0;
|
||||
struct file *file;
|
||||
struct inode *file_inode;
|
||||
size_t file_size = 0;
|
||||
size_t read_size = 0;
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
loff_t pos = 0;
|
||||
#endif
|
||||
|
||||
if (!nvkms_fs_mounted()) {
|
||||
printk(KERN_ERR NVKMS_LOG_PREFIX "ERROR: Filesystems not mounted\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
file = filp_open(fname, O_RDONLY, 0);
|
||||
if (file == NULL || IS_ERR(file)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to open %s\n",
|
||||
fname);
|
||||
return 0;
|
||||
}
|
||||
|
||||
file_inode = file->f_inode;
|
||||
if (file_inode == NULL || IS_ERR(file_inode)) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Inode is invalid\n");
|
||||
goto done;
|
||||
}
|
||||
file_size = file_inode->i_size;
|
||||
if (file_size > NVKMS_READ_FILE_MAX_SIZE) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: File exceeds maximum size\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
*buff = nvkms_alloc(file_size, NV_FALSE);
|
||||
if (*buff == NULL) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Out of memory\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Once we have access to GPL symbols, this can be replaced with
|
||||
* kernel_read_file for kernels >= 4.6
|
||||
*/
|
||||
while ((read_size < file_size) && (i++ < NVKMS_READ_FILE_MAX_LOOPS)) {
|
||||
#if defined(NV_KERNEL_READ_HAS_POINTER_POS_ARG)
|
||||
ssize_t ret = kernel_read(file, *buff + read_size,
|
||||
file_size - read_size, &pos);
|
||||
#else
|
||||
ssize_t ret = kernel_read(file, read_size,
|
||||
*buff + read_size,
|
||||
file_size - read_size);
|
||||
#endif
|
||||
if (ret <= 0) {
|
||||
break;
|
||||
}
|
||||
read_size += ret;
|
||||
}
|
||||
|
||||
if (read_size != file_size) {
|
||||
printk(KERN_WARNING NVKMS_LOG_PREFIX "WARNING: Failed to read %s\n",
|
||||
fname);
|
||||
goto done;
|
||||
}
|
||||
|
||||
filp_close(file, current->files);
|
||||
return file_size;
|
||||
|
||||
done:
|
||||
nvkms_free(*buff, file_size);
|
||||
filp_close(file, current->files);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* must be called with nvkms_lock locked */
|
||||
static void nvkms_read_config_file_locked(void)
|
||||
{
|
||||
char *buffer = NULL;
|
||||
size_t buf_size = 0;
|
||||
|
||||
/* only read the config file if the kernel parameter is set */
|
||||
if (!NVKMS_CONF_FILE_SPECIFIED) {
|
||||
return;
|
||||
}
|
||||
|
||||
buf_size = nvkms_config_file_open(nvkms_conf, &buffer);
|
||||
|
||||
if (buf_size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvKmsReadConf(buffer, buf_size, nvkms_config_file_open)) {
|
||||
printk(KERN_INFO NVKMS_LOG_PREFIX "Successfully read %s\n",
|
||||
nvkms_conf);
|
||||
}
|
||||
|
||||
nvkms_free(buffer, buf_size);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
* NVKMS KAPI functions
|
||||
************************************************************************/
|
||||
@ -1533,10 +1653,12 @@ static int __init nvkms_init(void)
|
||||
if (!nvKmsModuleLoad()) {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
up(&nvkms_lock);
|
||||
if (ret != 0) {
|
||||
up(&nvkms_lock);
|
||||
goto fail_module_load;
|
||||
}
|
||||
nvkms_read_config_file_locked();
|
||||
up(&nvkms_lock);
|
||||
|
||||
nvkms_proc_init();
|
||||
|
||||
|
@ -43,14 +43,9 @@ enum NvKmsSyncPtOp {
|
||||
NVKMS_SYNCPT_OP_ALLOC,
|
||||
NVKMS_SYNCPT_OP_GET,
|
||||
NVKMS_SYNCPT_OP_PUT,
|
||||
NVKMS_SYNCPT_OP_INCR_MAX,
|
||||
NVKMS_SYNCPT_OP_CPU_INCR,
|
||||
NVKMS_SYNCPT_OP_FD_TO_ID_AND_THRESH,
|
||||
NVKMS_SYNCPT_OP_ID_AND_THRESH_TO_FD,
|
||||
NVKMS_SYNCPT_OP_READ_MINVAL,
|
||||
NVKMS_SYNCPT_OP_READ_MAXVAL,
|
||||
NVKMS_SYNCPT_OP_SET_MIN_EQ_MAX,
|
||||
NVKMS_SYNCPT_OP_SET_MAXVAL,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@ -60,24 +55,10 @@ typedef struct {
|
||||
NvU32 id; /* out */
|
||||
} alloc;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} get;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} put;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 incr; /* in */
|
||||
NvU32 value; /* out */
|
||||
} incr_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} cpu_incr;
|
||||
|
||||
struct {
|
||||
NvS32 fd; /* in */
|
||||
NvU32 id; /* out */
|
||||
@ -94,20 +75,6 @@ typedef struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 minval; /* out */
|
||||
} read_minval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 maxval; /* out */
|
||||
} read_maxval;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
} set_min_eq_max;
|
||||
|
||||
struct {
|
||||
NvU32 id; /* in */
|
||||
NvU32 val; /* in */
|
||||
} set_maxval;
|
||||
} NvKmsSyncPtOpParams;
|
||||
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
@ -42,6 +42,20 @@ typedef void nvkms_procfs_proc_t(void *data,
|
||||
char *buffer, size_t size,
|
||||
nvkms_procfs_out_string_func_t *outString);
|
||||
|
||||
/* max number of loops to prevent hanging the kernel if an edge case is hit */
|
||||
#define NVKMS_READ_FILE_MAX_LOOPS 1000
|
||||
/* max size for any file read by the config system */
|
||||
#define NVKMS_READ_FILE_MAX_SIZE 8192
|
||||
|
||||
/*
|
||||
* The read file callback should allocate a buffer pointed to by *buff, fill it
|
||||
* with the contents of fname, and return the size of the buffer. Buffer is not
|
||||
* guaranteed to be null-terminated. The caller is responsible for freeing the
|
||||
* buffer with nvkms_free, not nvFree.
|
||||
*/
|
||||
typedef size_t nvkms_config_read_file_func_t(char *fname,
|
||||
char ** const buff);
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
nvkms_procfs_proc_t *func;
|
||||
@ -74,6 +88,9 @@ void nvKmsResume(NvU32 gpuId);
|
||||
|
||||
void nvKmsGetProcFiles(const nvkms_procfs_file_t **ppProcFiles);
|
||||
|
||||
NvBool nvKmsReadConf(const char *buff, size_t size,
|
||||
nvkms_config_read_file_func_t readfile);
|
||||
|
||||
void nvKmsKapiHandleEventQueueChange
|
||||
(
|
||||
struct NvKmsKapiDevice *device
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -69,6 +69,9 @@ extern "C" {
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE 25:25
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
|
||||
#define NVC3B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2013-2021 NVIDIA Corporation
|
||||
Copyright (c) 2013-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
|
@ -1,12 +1,6 @@
|
||||
NVIDIA_UVM_SOURCES ?=
|
||||
NVIDIA_UVM_SOURCES_CXX ?=
|
||||
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c
|
||||
@ -58,6 +52,7 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_pascal_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
@ -72,6 +67,12 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ampere_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_policy.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_perf_utils.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_kvmalloc.c
|
||||
@ -94,7 +95,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_test_rng.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_tree_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_range_allocator_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_gpu_semaphore_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hmm_sanity_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_rm_mem_test.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_page_tree_test.c
|
||||
|
@ -36,7 +36,7 @@ NVIDIA_UVM_KO = nvidia-uvm/nvidia-uvm.ko
|
||||
#
|
||||
|
||||
ifeq ($(UVM_BUILD_TYPE),debug)
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG $(call cc-option,-Og,-O0) -g
|
||||
NVIDIA_UVM_CFLAGS += -DDEBUG -O1 -g
|
||||
else
|
||||
ifeq ($(UVM_BUILD_TYPE),develop)
|
||||
# -DDEBUG is required, in order to allow pr_devel() print statements to
|
||||
@ -81,8 +81,10 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
@ -100,6 +102,6 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_vma_added_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += make_device_exclusive_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += migrate_device_range
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
|
@ -35,26 +35,95 @@
|
||||
#include "uvm_linux_ioctl.h"
|
||||
#include "uvm_hmm.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
|
||||
#define NVIDIA_UVM_DEVICE_NAME "nvidia-uvm"
|
||||
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
static const struct file_operations uvm_fops;
|
||||
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
if (status == NV_OK) {
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val)
|
||||
{
|
||||
unsigned long uptr;
|
||||
uvm_fd_type_t type;
|
||||
void *ptr;
|
||||
|
||||
status = uvm_va_space_create(inode, filp);
|
||||
UVM_ASSERT(uvm_file_is_nvidia_uvm(filp));
|
||||
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
uptr = atomic_long_read_acquire((atomic_long_t *) (&filp->private_data));
|
||||
type = (uvm_fd_type_t)(uptr & UVM_FD_TYPE_MASK);
|
||||
ptr = (void *)(uptr & ~UVM_FD_TYPE_MASK);
|
||||
BUILD_BUG_ON(UVM_FD_COUNT > UVM_FD_TYPE_MASK + 1);
|
||||
|
||||
switch (type) {
|
||||
case UVM_FD_UNINITIALIZED:
|
||||
case UVM_FD_INITIALIZING:
|
||||
UVM_ASSERT(!ptr);
|
||||
break;
|
||||
|
||||
case UVM_FD_VA_SPACE:
|
||||
UVM_ASSERT(ptr);
|
||||
BUILD_BUG_ON(__alignof__(uvm_va_space_t) < (1UL << UVM_FD_TYPE_BITS));
|
||||
break;
|
||||
|
||||
default:
|
||||
UVM_ASSERT(0);
|
||||
}
|
||||
|
||||
if (ptr_val)
|
||||
*ptr_val = ptr;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
// Called when opening /dev/nvidia-uvm. This code doesn't take any UVM locks, so
|
||||
// there's no need to acquire g_uvm_global.pm.lock, but if that changes the PM
|
||||
// lock will need to be taken.
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
mapping = uvm_kvmalloc(sizeof(*mapping));
|
||||
if (!mapping)
|
||||
return -ENOMEM;
|
||||
|
||||
// By default all struct files on the same inode share the same
|
||||
// address_space structure (the inode's) across all processes. This means
|
||||
// unmap_mapping_range would unmap virtual mappings across all processes on
|
||||
// that inode.
|
||||
//
|
||||
// Since the UVM driver uses the mapping offset as the VA of the file's
|
||||
// process, we need to isolate the mappings to each process.
|
||||
address_space_init_once(mapping);
|
||||
mapping->host = inode;
|
||||
|
||||
// Some paths in the kernel, for example force_page_cache_readahead which
|
||||
// can be invoked from user-space via madvise MADV_WILLNEED and fadvise
|
||||
// POSIX_FADV_WILLNEED, check the function pointers within
|
||||
// file->f_mapping->a_ops for validity. However, those paths assume that a_ops
|
||||
// itself is always valid. Handle that by using the inode's a_ops pointer,
|
||||
// which is what f_mapping->a_ops would point to anyway if we weren't re-
|
||||
// assigning f_mapping.
|
||||
mapping->a_ops = inode->i_mapping->a_ops;
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO)
|
||||
mapping->backing_dev_info = inode->i_mapping->backing_dev_info;
|
||||
#endif
|
||||
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = mapping;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int uvm_open_entry(struct inode *inode, struct file *filp)
|
||||
@ -80,9 +149,18 @@ static void uvm_release_deferred(void *data)
|
||||
|
||||
static int uvm_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_fd_type_t fd_type;
|
||||
int ret;
|
||||
|
||||
fd_type = uvm_fd_type(filp, (void **)&va_space);
|
||||
UVM_ASSERT(fd_type != UVM_FD_INITIALIZING);
|
||||
if (fd_type == UVM_FD_UNINITIALIZED) {
|
||||
uvm_kvfree(filp->f_mapping);
|
||||
return 0;
|
||||
}
|
||||
|
||||
UVM_ASSERT(fd_type == UVM_FD_VA_SPACE);
|
||||
filp->private_data = NULL;
|
||||
filp->f_mapping = NULL;
|
||||
|
||||
@ -100,7 +178,7 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
// which case the references are rendered stale.
|
||||
address_space_init_once(&va_space->mapping);
|
||||
address_space_init_once(va_space->mapping);
|
||||
|
||||
nv_kthread_q_item_init(&va_space->deferred_release_q_item, uvm_release_deferred, va_space);
|
||||
ret = nv_kthread_q_schedule_q_item(&g_uvm_global.deferred_release_q, &va_space->deferred_release_q_item);
|
||||
@ -363,14 +441,12 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
uvm_record_lock_mmap_lock_write(current->mm);
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
// current->mm will be NULL on process teardown, in which case we have
|
||||
// special handling.
|
||||
if (current->mm == NULL) {
|
||||
@ -400,13 +476,11 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to avoid finding stale entries
|
||||
// that can be attributed to new VA ranges reallocated at the same address
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
|
||||
gpu_va_space->needs_fault_buffer_flush = true;
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
@ -556,7 +630,7 @@ static struct vm_operations_struct uvm_vm_ops_semaphore_pool =
|
||||
|
||||
static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_va_space_t *va_space;
|
||||
uvm_va_range_t *va_range;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
int ret = 0;
|
||||
@ -565,8 +639,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
if (status != NV_OK)
|
||||
return -nv_status_to_errno(status);
|
||||
|
||||
status = uvm_va_space_initialized(va_space);
|
||||
if (status != NV_OK)
|
||||
va_space = uvm_fd_va_space(filp);
|
||||
if (!va_space)
|
||||
return -EBADFD;
|
||||
|
||||
// When the VA space is associated with an mm, all vmas under the VA space
|
||||
@ -618,7 +692,11 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
// Using VM_DONTCOPY would be nice, but madvise(MADV_DOFORK) can reset that
|
||||
// so we have to handle vm_open on fork anyway. We could disable MADV_DOFORK
|
||||
// with VM_IO, but that causes other mapping issues.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
|
||||
// Make the default behavior be VM_DONTCOPY to avoid the performance impact
|
||||
// of removing CPU mappings in the parent on fork()+exec(). Users can call
|
||||
// madvise(MDV_DOFORK) if the child process requires access to the
|
||||
// allocation.
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTCOPY;
|
||||
|
||||
vma->vm_ops = &uvm_vm_ops_managed;
|
||||
|
||||
@ -685,7 +763,53 @@ static int uvm_mmap_entry(struct file *filp, struct vm_area_struct *vma)
|
||||
|
||||
static NV_STATUS uvm_api_initialize(UVM_INITIALIZE_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return uvm_va_space_initialize(uvm_va_space_get(filp), params->flags);
|
||||
uvm_va_space_t *va_space;
|
||||
NV_STATUS status;
|
||||
uvm_fd_type_t old_fd_type;
|
||||
|
||||
// Normally we expect private_data == UVM_FD_UNINITIALIZED. However multiple
|
||||
// threads may call this ioctl concurrently so we have to be careful to
|
||||
// avoid initializing multiple va_spaces and/or leaking memory. To do this
|
||||
// we do an atomic compare and swap. Only one thread will observe
|
||||
// UVM_FD_UNINITIALIZED and that thread will allocate and setup the
|
||||
// va_space.
|
||||
//
|
||||
// Other threads will either see UVM_FD_INITIALIZING or UVM_FD_VA_SPACE. In
|
||||
// the case of UVM_FD_VA_SPACE we return success if and only if the
|
||||
// initialization flags match. If another thread is still initializing the
|
||||
// va_space we return NV_ERR_BUSY_RETRY.
|
||||
//
|
||||
// If va_space initialization fails we return the failure code and reset the
|
||||
// FD state back to UVM_FD_UNINITIALIZED to allow another initialization
|
||||
// attempt to be made. This is safe because other threads will have only had
|
||||
// a chance to observe UVM_FD_INITIALIZING and not UVM_FD_VA_SPACE in this
|
||||
// case.
|
||||
old_fd_type = nv_atomic_long_cmpxchg((atomic_long_t *)&filp->private_data,
|
||||
UVM_FD_UNINITIALIZED, UVM_FD_INITIALIZING);
|
||||
old_fd_type &= UVM_FD_TYPE_MASK;
|
||||
if (old_fd_type == UVM_FD_UNINITIALIZED) {
|
||||
status = uvm_va_space_create(filp->f_mapping, &va_space, params->flags);
|
||||
if (status != NV_OK) {
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, UVM_FD_UNINITIALIZED);
|
||||
return status;
|
||||
}
|
||||
|
||||
atomic_long_set_release((atomic_long_t *)&filp->private_data, (long)va_space | UVM_FD_VA_SPACE);
|
||||
}
|
||||
else if (old_fd_type == UVM_FD_VA_SPACE) {
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
if (params->flags != va_space->initialization_flags)
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(old_fd_type == UVM_FD_INITIALIZING);
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_api_pageable_mem_access(UVM_PAGEABLE_MEM_ACCESS_PARAMS *params, struct file *filp)
|
||||
@ -782,11 +906,6 @@ static const struct file_operations uvm_fops =
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
{
|
||||
return (filp != NULL) && (filp->f_op == &uvm_fops);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
|
@ -1746,17 +1746,20 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// GPUs. The external allocation can be unmapped from a specific GPU using
|
||||
// UvmUnmapExternal or from all GPUs using UvmFree.
|
||||
//
|
||||
// The virtual address range specified by (base, length) must be aligned to the
|
||||
// allocation's physical page size and must fall within a VA range previously
|
||||
// created with UvmCreateExternalRange. A GPU VA space must have been registered
|
||||
// for each GPU in the list. The offset in the physical allocation at which the
|
||||
// allocation must be mapped should also be aligned to the allocation's physical
|
||||
// page size. The (base, length) range must lie within the largest possible
|
||||
// virtual address supported by the specified GPUs.
|
||||
// The virtual address range specified by (base, length) must fall within a VA
|
||||
// range previously created with UvmCreateExternalRange. A GPU VA space must
|
||||
// have been registered for each GPU in the list. The (base, length) range must
|
||||
// lie within the largest possible virtual address supported by the specified
|
||||
// GPUs.
|
||||
//
|
||||
// The page size used for the mapping is the largest supported page size less
|
||||
// than or equal to the alignments of base, length, offset, and the allocation
|
||||
// page size.
|
||||
//
|
||||
// If the range specified by (base, length) falls within any existing mappings,
|
||||
// the behavior is the same as if UvmUnmapExternal with the range specified by
|
||||
// (base, length) had been called first.
|
||||
// (base, length) had been called first, provided that base and length are
|
||||
// aligned to the page size used for the existing one.
|
||||
//
|
||||
// If the allocation resides in GPU memory, that GPU must have been registered
|
||||
// via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
|
||||
@ -1838,8 +1841,9 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// - The requested address range does not fall entirely within an
|
||||
// existing external VA range created with a single call to
|
||||
// UvmCreateExternalRange.
|
||||
// - At least one of base and length is not aligned to the allocation's
|
||||
// physical page size.
|
||||
// - The mapping page size allowed by the alignments of base, length,
|
||||
// and offset is smaller than the minimum supported page size on the
|
||||
// GPU.
|
||||
// - base or base + length fall within an existing mapping but are not
|
||||
// aligned to that mapping's page size.
|
||||
//
|
||||
@ -1848,8 +1852,7 @@ NV_STATUS UvmCreateExternalRange(void *base,
|
||||
// address supported by one or more of the specified GPUs.
|
||||
//
|
||||
// NV_ERR_INVALID_OFFSET:
|
||||
// offset is not aligned to the allocation's physical page size or
|
||||
// offset+length exceeds the allocation size.
|
||||
// - offset+length exceeds the allocation size.
|
||||
//
|
||||
// NV_ERR_INVALID_DEVICE:
|
||||
// One of the following occurred:
|
||||
@ -3758,6 +3761,7 @@ NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
|
||||
// NV_ERR_INVALID_ARGUMENT:
|
||||
// Read spans more than a single target process allocation.
|
||||
//
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session,
|
||||
void *buffer,
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "clc7b5.h"
|
||||
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
|
||||
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
@ -112,7 +112,7 @@ NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
|
||||
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
NvU64 push_begin_gpu_va;
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@ -183,7 +183,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "clc56f.h"
|
||||
#include "clc076.h"
|
||||
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
@ -82,7 +82,7 @@ bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
if (!uvm_channel_is_proxy(push->channel))
|
||||
return true;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#define __UVM_API_H__
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_ioctl.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_lock.h"
|
||||
@ -51,8 +52,10 @@
|
||||
\
|
||||
params.rmStatus = uvm_global_get_status(); \
|
||||
if (params.rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params.rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params.rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params.rmStatus == NV_OK)) \
|
||||
params.rmStatus = function_name(¶ms, filp); \
|
||||
} \
|
||||
@ -88,8 +91,10 @@
|
||||
\
|
||||
params->rmStatus = uvm_global_get_status(); \
|
||||
if (params->rmStatus == NV_OK) { \
|
||||
if (do_init_check) \
|
||||
params->rmStatus = uvm_va_space_initialized(uvm_va_space_get(filp)); \
|
||||
if (do_init_check) { \
|
||||
if (!uvm_fd_va_space(filp)) \
|
||||
params->rmStatus = NV_ERR_ILLEGAL_ACTION; \
|
||||
} \
|
||||
if (likely(params->rmStatus == NV_OK)) \
|
||||
params->rmStatus = function_name(params, filp); \
|
||||
} \
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_tracker.h"
|
||||
@ -655,8 +656,10 @@ static NV_STATUS test_ce(uvm_va_space_t *va_space, bool skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_memcpy_and_memset(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_reduction_inc(gpu));
|
||||
TEST_NV_CHECK_RET(test_semaphore_release(gpu));
|
||||
|
||||
if (!skipTimestampTest)
|
||||
TEST_NV_CHECK_RET(test_semaphore_timestamp(gpu));
|
||||
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
@ -77,7 +77,7 @@ static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
static void channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
@ -85,7 +85,7 @@ void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
uvm_spin_lock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
static void channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
@ -93,14 +93,6 @@ void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
uvm_spin_unlock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
else
|
||||
uvm_assert_spinlock_locked(&pool->spinlock);
|
||||
}
|
||||
|
||||
// Update channel progress, completing up to max_to_complete entries
|
||||
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 max_to_complete,
|
||||
@ -113,12 +105,14 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
"GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
|
||||
channel->pool->manager->gpu->parent->name, channel->name, completed_value,
|
||||
channel->pool->manager->gpu->parent->name,
|
||||
channel->name,
|
||||
completed_value,
|
||||
channel->tracking_sem.queued_value);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
@ -141,7 +135,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
channel->gpu_get = gpu_get;
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
if (cpu_put >= gpu_get)
|
||||
pending_gpfifos = cpu_put - gpu_get;
|
||||
@ -154,7 +148,8 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 uvm_channel_update_progress(uvm_channel_t *channel)
|
||||
{
|
||||
// By default, don't complete too many entries at a time to spread the cost
|
||||
// of doing so across callers and avoid holding a spin lock for too long.
|
||||
// of doing so across callers and avoid potentially holding a spin lock for
|
||||
// too long.
|
||||
return uvm_channel_update_progress_with_max(channel, 8, UVM_CHANNEL_UPDATE_MODE_COMPLETED);
|
||||
}
|
||||
|
||||
@ -186,71 +181,96 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
|
||||
return pending_gpfifos;
|
||||
}
|
||||
|
||||
static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
static NvU32 channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
NvU32 available = channel->num_gpfifo_entries;
|
||||
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
// Remove sentinel entry
|
||||
available -= 1;
|
||||
|
||||
return (pending_entries + channel->current_gpfifo_count + num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
// Remove entries of ongoing pushes
|
||||
available -= channel->current_gpfifo_count;
|
||||
|
||||
// Remove pending entries
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
available -= (channel->cpu_put - channel->gpu_get);
|
||||
else
|
||||
available -= (channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get);
|
||||
|
||||
UVM_ASSERT(available < channel->num_gpfifo_entries);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 available;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
available = channel_get_available_gpfifo_entries(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return available;
|
||||
}
|
||||
|
||||
static bool try_claim_channel_locked(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed = false;
|
||||
|
||||
UVM_ASSERT(num_gpfifo_entries > 0);
|
||||
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (channel_is_available(channel, num_gpfifo_entries)) {
|
||||
if (channel_get_available_gpfifo_entries(channel) >= num_gpfifo_entries) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
}
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
{
|
||||
bool claimed;
|
||||
|
||||
channel_pool_lock(channel->pool);
|
||||
claimed = try_claim_channel_locked(channel, num_gpfifo_entries);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return claimed;
|
||||
}
|
||||
|
||||
static void lock_push(uvm_channel_t *channel)
|
||||
static void unlock_channel_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
}
|
||||
|
||||
static void unlock_push(uvm_channel_t *channel)
|
||||
static bool is_channel_locked_for_push(uvm_channel_t *channel)
|
||||
{
|
||||
}
|
||||
|
||||
static bool trylock_push(uvm_channel_t *channel)
|
||||
{
|
||||
// For CE and proxy channels, we always return that the channel is locked,
|
||||
// which has no functional impact in the UVM channel code-flow, this is only
|
||||
// used on UVM_ASSERTs.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reserve a channel in the specified pool
|
||||
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
// Reserve a channel in the specified CE pool
|
||||
static NV_STATUS channel_reserve_in_ce_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_t *channel;
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
UVM_ASSERT(pool);
|
||||
UVM_ASSERT(uvm_channel_pool_is_ce(pool));
|
||||
|
||||
uvm_for_each_channel_in_pool(channel, pool) {
|
||||
// TODO: Bug 1764953: Prefer idle/less busy channels
|
||||
if (trylock_push(channel)) {
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
*channel_out = channel;
|
||||
return NV_OK;
|
||||
}
|
||||
else {
|
||||
unlock_push(channel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_loop_init(&spin);
|
||||
@ -261,7 +281,6 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
if (try_claim_channel(channel, 1)) {
|
||||
lock_push(channel);
|
||||
*channel_out = channel;
|
||||
|
||||
return NV_OK;
|
||||
@ -281,9 +300,12 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
uvm_channel_pool_t *pool = manager->pool_to_use.default_for_type[type];
|
||||
|
||||
UVM_ASSERT(pool != NULL);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
@ -299,7 +321,7 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
|
||||
|
||||
UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
return channel_reserve_in_pool(pool, channel_out);
|
||||
return channel_reserve_in_ce_pool(pool, channel_out);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
|
||||
@ -323,14 +345,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_info_t *push_info;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
|
||||
UVM_ASSERT(push_info != NULL);
|
||||
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
|
||||
list_del(&push_info->available_list_node);
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
@ -345,6 +367,8 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
|
||||
|
||||
manager = channel->pool->manager;
|
||||
|
||||
UVM_ASSERT(is_channel_locked_for_push(channel));
|
||||
|
||||
status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -439,7 +463,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
new_payload = (NvU32)new_tracking_value;
|
||||
@ -476,8 +500,8 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
unlock_channel_for_push(channel);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
@ -500,7 +524,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
|
||||
@ -534,9 +558,10 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
|
||||
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that.
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
// push must be updated before that. Note that we do not call
|
||||
// unlock_channel_for_push() because a control GPFIFO is followed by a
|
||||
// semaphore release, where the channel is unlocked.
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
// issues on some systems. Comment from CUDA: "fixes throughput-related
|
||||
@ -593,7 +618,7 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
uvm_spin_loop_t spin;
|
||||
|
||||
if (try_claim_channel(channel, num_gpfifo_entries))
|
||||
goto out;
|
||||
return NV_OK;
|
||||
|
||||
uvm_channel_update_progress(channel);
|
||||
|
||||
@ -604,10 +629,6 @@ NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
uvm_channel_update_progress(channel);
|
||||
}
|
||||
|
||||
out:
|
||||
if (status == NV_OK)
|
||||
lock_push(channel);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -621,12 +642,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
if (pending_count == 0)
|
||||
return NULL;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->gpu_get != channel->cpu_put)
|
||||
entry = &channel->gpfifo_entries[channel->gpu_get];
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
channel_pool_unlock(channel->pool);
|
||||
|
||||
return entry;
|
||||
}
|
||||
@ -780,18 +801,17 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
}
|
||||
|
||||
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
|
||||
channel_alloc_params.numGpFifoEntries = manager->conf.num_gpfifo_entries;
|
||||
channel_alloc_params.gpFifoLoc = manager->conf.gpfifo_loc;
|
||||
channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;
|
||||
channel_alloc_params.engineIndex = engine_index;
|
||||
|
||||
if (uvm_channel_is_ce(channel))
|
||||
if (uvm_channel_is_ce(channel)) {
|
||||
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
|
||||
|
||||
channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
|
||||
}
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
|
||||
&channel_alloc_params,
|
||||
@ -923,7 +943,7 @@ NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaph
|
||||
return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, uvm_channel_is_proxy(access_channel));
|
||||
}
|
||||
|
||||
static NV_STATUS init_channel(uvm_channel_t *channel)
|
||||
static NV_STATUS channel_init(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_t push;
|
||||
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
|
||||
@ -1010,6 +1030,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
channel_pool_lock_init(pool);
|
||||
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
UVM_ASSERT(num_channels <= UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL);
|
||||
|
||||
pool->channels = uvm_kvmalloc_zero(sizeof(*pool->channels) * num_channels);
|
||||
if (!pool->channels)
|
||||
@ -1024,7 +1045,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = init_channel(channel);
|
||||
status = channel_init(channel);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
@ -1404,15 +1425,55 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
manager->conf.gpput_loc = string_to_buffer_location(gpput_loc_value);
|
||||
}
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as the
|
||||
// preferred CE for any type, because as more information is discovered (for
|
||||
// example, a pair of peer GPUs is added) we may start using the previously idle
|
||||
// channels.
|
||||
// Returns the maximum number of pools that are needed in the current
|
||||
// configuration. The implementation may choose to create a smaller number of
|
||||
// pools.
|
||||
static unsigned channel_manager_get_max_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
unsigned num_channel_pools;
|
||||
unsigned num_used_ce = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
// Create one CE channel pool per usable CE
|
||||
num_channel_pools = num_used_ce;
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
return num_channel_pools;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_ce_pools(uvm_channel_manager_t *manager, unsigned *preferred_ce)
|
||||
{
|
||||
unsigned ce;
|
||||
|
||||
// A pool is created for each usable CE, even if it has not been selected as
|
||||
// the preferred CE for any type, because as more information is discovered
|
||||
// (for example, a pair of peer GPUs is added) we may start using the
|
||||
// previously idle pools.
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
NV_STATUS status;
|
||||
unsigned type;
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_CE_COUNT; type++) {
|
||||
if (preferred_ce[type] == ce)
|
||||
manager->pool_to_use.default_for_type[type] = pool;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
{
|
||||
NV_STATUS status;
|
||||
unsigned ce, type;
|
||||
unsigned num_channel_pools;
|
||||
uvm_channel_type_t type;
|
||||
unsigned max_channel_pools;
|
||||
unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
|
||||
uvm_channel_pool_t *pool = NULL;
|
||||
|
||||
@ -1423,36 +1484,21 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// CE channel pools
|
||||
num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
max_channel_pools = channel_manager_get_max_pools(manager);
|
||||
|
||||
// CE proxy channel pool.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
|
||||
num_channel_pools++;
|
||||
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
|
||||
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * max_channel_pools);
|
||||
if (!manager->channel_pools)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
|
||||
status = channel_manager_create_ce_pools(manager, preferred_ce);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
// Assign channel types to pools
|
||||
for (type = 0; type < ARRAY_SIZE(preferred_ce); type++) {
|
||||
unsigned ce = preferred_ce[type];
|
||||
|
||||
UVM_ASSERT(test_bit(ce, manager->ce_mask));
|
||||
|
||||
manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
|
||||
}
|
||||
|
||||
// In SR-IOV heavy, add an additional, single-channel, pool that is
|
||||
// dedicated to the MEMOPS type.
|
||||
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
|
||||
uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();
|
||||
|
||||
status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE_PROXY, preferred_ce[channel_type], &pool);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -1613,7 +1659,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
|
||||
@ -1625,7 +1671,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
|
||||
@ -1669,7 +1715,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
|
||||
@ -1717,7 +1763,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
}
|
||||
}
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
|
||||
|
@ -50,6 +50,9 @@
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
|
||||
#define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
|
||||
|
||||
// Maximum number of channels per pool.
|
||||
#define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL 8
|
||||
|
||||
// Semaphore payloads cannot advance too much between calls to
|
||||
// uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
|
||||
// are bound by gpfifo sizing as we have to update the completed value to
|
||||
@ -61,6 +64,14 @@
|
||||
// uvm_channel.h includes uvm_gpu_semaphore.h.
|
||||
#define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
|
||||
|
||||
#define uvm_channel_pool_assert_locked(pool) ( \
|
||||
{ \
|
||||
if (uvm_channel_pool_is_proxy(pool)) \
|
||||
uvm_assert_mutex_locked(&(pool)->mutex); \
|
||||
else \
|
||||
uvm_assert_spinlock_locked(&(pool)->spinlock); \
|
||||
})
|
||||
|
||||
// Channel types
|
||||
typedef enum
|
||||
{
|
||||
@ -162,7 +173,20 @@ typedef struct
|
||||
// Pool type: Refer to the uvm_channel_pool_type_t enum.
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
// Lock protecting the state of channels in the pool.
|
||||
//
|
||||
// There are two pool lock types available: spinlock and mutex. The mutex
|
||||
// variant is required when the thread holding the pool lock must
|
||||
// sleep (ex: acquire another mutex) deeper in the call stack, either in UVM
|
||||
// or RM. For example, work submission to proxy channels in SR-IOV heavy
|
||||
// entails calling an RM API that acquires a mutex, so the proxy channel
|
||||
// pool must use the mutex variant.
|
||||
//
|
||||
// Unless the mutex is required, the spinlock is preferred. This is because,
|
||||
// other than for proxy channels, work submission takes little time and does
|
||||
// not involve any RM calls, so UVM can avoid any invocation that may result
|
||||
// on a sleep. All non-proxy channel pools use the spinlock variant, even in
|
||||
// SR-IOV heavy.
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
@ -275,7 +299,7 @@ struct uvm_channel_manager_struct
|
||||
unsigned num_channel_pools;
|
||||
|
||||
// Mask containing the indexes of the usable Copy Engines. Each usable CE
|
||||
// has a pool associated with it, see channel_manager_ce_pool
|
||||
// has at least one pool associated with it.
|
||||
DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
struct
|
||||
@ -313,10 +337,6 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
@ -329,10 +349,16 @@ static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
|
||||
return uvm_channel_pool_is_ce(channel->pool);
|
||||
}
|
||||
|
||||
// Proxy channels are used to push page tree related methods, so their channel
|
||||
@ -449,6 +475,10 @@ NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_
|
||||
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
|
||||
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
|
||||
|
||||
// Returns the number of available GPFIFO entries. The function internally
|
||||
// acquires the channel pool lock.
|
||||
NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
|
||||
|
||||
static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
|
||||
|
@ -153,7 +153,6 @@ done:
|
||||
|
||||
static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
@ -168,11 +167,12 @@ static NV_STATUS test_unexpected_completed_values(uvm_va_space_t *va_space)
|
||||
completed_value = uvm_channel_update_completed_value(channel);
|
||||
uvm_gpu_semaphore_set_payload(&channel->tracking_sem.semaphore, (NvU32)completed_value + 1);
|
||||
|
||||
TEST_CHECK_RET(uvm_global_get_status() == NV_OK);
|
||||
TEST_NV_CHECK_RET(uvm_global_get_status());
|
||||
uvm_channel_update_progress_all(channel);
|
||||
TEST_CHECK_RET(uvm_global_reset_fatal_error() == NV_ERR_INVALID_STATE);
|
||||
|
||||
uvm_channel_manager_destroy(gpu->channel_manager);
|
||||
|
||||
// Destruction will hit the error again, so clear one more time.
|
||||
uvm_global_reset_fatal_error();
|
||||
|
||||
@ -743,22 +743,6 @@ NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return channel->num_gpfifo_entries - pending_entries - 1;
|
||||
}
|
||||
|
||||
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -771,9 +755,10 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
NvU64 entry;
|
||||
uvm_push_t push;
|
||||
|
||||
gpu = uvm_va_space_find_first_gpu(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_t *manager = gpu->channel_manager;
|
||||
gpu = manager->gpu;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
|
||||
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
|
||||
@ -791,6 +776,12 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Flush all completed entries from the GPFIFO ring buffer. This test
|
||||
// requires this flush because we verify (below with
|
||||
// uvm_channel_get_available_gpfifo_entries) the number of free entries
|
||||
// in the channel.
|
||||
uvm_channel_update_progress_all(channel);
|
||||
|
||||
// Populate the remaining GPFIFO entries, leaving 2 slots available.
|
||||
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
|
||||
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
|
||||
@ -800,7 +791,7 @@ NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
|
||||
uvm_push_end(&push);
|
||||
}
|
||||
|
||||
TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
|
||||
TEST_CHECK_GOTO(uvm_channel_get_available_gpfifo_entries(channel) == 2, error);
|
||||
|
||||
// We should have room for the control GPFIFO and the subsequent
|
||||
// semaphore release.
|
||||
@ -936,7 +927,7 @@ done:
|
||||
static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
const UVM_TEST_CHANNEL_STRESS_PARAMS *params)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (params->iterations == 0 || params->num_streams == 0)
|
||||
return NV_ERR_INVALID_PARAMETER;
|
||||
@ -951,10 +942,7 @@ static NV_STATUS uvm_test_channel_stress_stream(uvm_va_space_t *va_space,
|
||||
params->iterations,
|
||||
params->seed,
|
||||
params->verbose);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
|
@ -347,6 +347,21 @@ typedef struct
|
||||
NvHandle user_object;
|
||||
} uvm_rm_user_object_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_FD_UNINITIALIZED,
|
||||
UVM_FD_INITIALIZING,
|
||||
UVM_FD_VA_SPACE,
|
||||
UVM_FD_COUNT
|
||||
} uvm_fd_type_t;
|
||||
|
||||
// This should be large enough to fit the valid values from uvm_fd_type_t above.
|
||||
// Note we can't use order_base_2(UVM_FD_COUNT) to define this because our code
|
||||
// coverage tool fails due when the preprocessor expands that to a huge mess of
|
||||
// ternary operators.
|
||||
#define UVM_FD_TYPE_BITS 2
|
||||
#define UVM_FD_TYPE_MASK ((1UL << UVM_FD_TYPE_BITS) - 1)
|
||||
|
||||
// Macro used to compare two values for types that support less than operator.
|
||||
// It returns -1 if a < b, 1 if a > b and 0 if a == 0
|
||||
#define UVM_CMP_DEFAULT(a,b) \
|
||||
@ -369,6 +384,10 @@ typedef struct
|
||||
// file. A NULL input returns false.
|
||||
bool uvm_file_is_nvidia_uvm(struct file *filp);
|
||||
|
||||
// Returns the type of data filp->private_data contains to and if ptr_val !=
|
||||
// NULL returns the value of the pointer.
|
||||
uvm_fd_type_t uvm_fd_type(struct file *filp, void **ptr_val);
|
||||
|
||||
// Reads the first word in the supplied struct page.
|
||||
static inline void uvm_touch_page(struct page *page)
|
||||
{
|
||||
|
@ -28,6 +28,8 @@ typedef struct uvm_global_struct uvm_global_t;
|
||||
|
||||
typedef struct uvm_gpu_struct uvm_gpu_t;
|
||||
typedef struct uvm_parent_gpu_struct uvm_parent_gpu_t;
|
||||
typedef struct uvm_gpu_chunk_struct uvm_gpu_chunk_t;
|
||||
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
typedef struct uvm_rm_mem_struct uvm_rm_mem_t;
|
||||
typedef struct uvm_mem_struct uvm_mem_t;
|
||||
typedef struct uvm_host_hal_struct uvm_host_hal_t;
|
||||
@ -56,6 +58,7 @@ typedef struct uvm_va_range_struct uvm_va_range_t;
|
||||
typedef struct uvm_va_block_struct uvm_va_block_t;
|
||||
typedef struct uvm_va_block_test_struct uvm_va_block_test_t;
|
||||
typedef struct uvm_va_block_wrapper_struct uvm_va_block_wrapper_t;
|
||||
typedef struct uvm_va_block_retry_struct uvm_va_block_retry_t;
|
||||
typedef struct uvm_va_space_struct uvm_va_space_t;
|
||||
typedef struct uvm_va_space_mm_struct uvm_va_space_mm_t;
|
||||
|
||||
|
@ -191,6 +191,16 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
||||
}
|
||||
|
||||
// Get a parent gpu by its id.
|
||||
// Returns a pointer to the parent GPU object, or NULL if not found.
|
||||
//
|
||||
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
|
||||
// retained at least one of the child GPUs.
|
||||
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_gpu_id_t id)
|
||||
{
|
||||
return g_uvm_global.parent_gpus[uvm_id_gpu_index(id)];
|
||||
}
|
||||
|
||||
// Get a gpu by its global id.
|
||||
// Returns a pointer to the GPU object, or NULL if not found.
|
||||
//
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -94,8 +94,6 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
|
||||
return UVM_GPU_LINK_NVLINK_3;
|
||||
case UVM_LINK_TYPE_NVLINK_4:
|
||||
return UVM_GPU_LINK_NVLINK_4;
|
||||
case UVM_LINK_TYPE_C2C:
|
||||
return UVM_GPU_LINK_C2C;
|
||||
default:
|
||||
return UVM_GPU_LINK_INVALID;
|
||||
}
|
||||
@ -210,27 +208,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->rm_info.subdeviceCount == 1;
|
||||
}
|
||||
|
||||
static bool parent_gpu_uses_canonical_form_address(uvm_parent_gpu_t *parent_gpu)
|
||||
static bool platform_uses_canonical_form_address(void)
|
||||
{
|
||||
NvU64 gpu_addr_shift;
|
||||
NvU64 cpu_addr_shift;
|
||||
|
||||
// PPC64LE doesn't use canonical form addresses.
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
// We use big_page_size as UVM_PAGE_SIZE_64K because num_va_bits() is
|
||||
// big_page_size invariant in the MMU HAL.
|
||||
UVM_ASSERT(!parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K) ||
|
||||
(parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits() ==
|
||||
parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_128K)->num_va_bits()));
|
||||
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
// Refer to the comments and diagram in uvm_gpu.c:uvm_gpu_can_address().
|
||||
return gpu_addr_shift >= cpu_addr_shift;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
@ -239,6 +222,9 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// the canonical address form.
|
||||
NvU64 max_va_lower;
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
@ -246,6 +232,10 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
// by applying the same upper-bit checks that x86, ARM, and Power
|
||||
// processors do. x86 and ARM use canonical form addresses. For ARM, even
|
||||
@ -255,13 +245,15 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// mapped (or addressed) by the GPU/CPU when the CPU uses canonical form.
|
||||
// (C) regions are only accessible by the CPU. Similarly, (G) regions
|
||||
// are only accessible by the GPU. (X) regions are not addressible.
|
||||
// Note that we only consider (V) regions, i.e., address ranges that are
|
||||
// addressable by both, the CPU and GPU.
|
||||
//
|
||||
// GPU MAX VA < CPU MAX VA GPU MAX VA >= CPU MAX VA
|
||||
// 0xF..F +----------------+ 0xF..F +----------------+
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| |VVVVVVVVVVVVVVVV|
|
||||
// |CCCCCCCCCCCCCCCC| CPU MIN UPPER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// GPU MIN UPPER VA|----------------| CPU MIN UPPER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// CPU MIN UPPER VA|----------------| GPU MIN UPPER VA|----------------|
|
||||
@ -270,32 +262,83 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// CPU MAX LOWER VA|----------------| GPU MAX LOWER VA|----------------|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// |CCCCCCCCCCCCCCCC| |GGGGGGGGGGGGGGGG|
|
||||
// GPU MAX VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// GPU MAX LOWER VA|----------------| CPU MAX LOWER VA|----------------|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// |VVVVVVVVVVVVVVVV| |VVVVVVVVVVVVVVVV|
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(gpu->parent)) {
|
||||
NvU64 min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - gpu->address_space_tree.hal->num_va_bits()));
|
||||
max_va_lower = 1ULL << (gpu->address_space_tree.hal->num_va_bits() - 1);
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
NvU64 min_va_upper;
|
||||
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
// See more details on uvm_parent_gpu_canonical_address(..);
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
|
||||
max_va_lower = 1ULL << (addr_shift - 1);
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
else {
|
||||
max_va_lower = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
max_va_lower = 1ULL << addr_shift;
|
||||
return addr_end < max_va_lower;
|
||||
}
|
||||
}
|
||||
|
||||
// The internal UVM VAS does not use canonical form addresses.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
{
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU64 max_gpu_va;
|
||||
|
||||
// Watch out for calling this too early in init
|
||||
UVM_ASSERT(gpu->address_space_tree.hal);
|
||||
UVM_ASSERT(gpu->address_space_tree.hal->num_va_bits() < 64);
|
||||
UVM_ASSERT(addr <= addr_end);
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
max_gpu_va = 1ULL << gpu->address_space_tree.hal->num_va_bits();
|
||||
return addr_end < max_gpu_va;
|
||||
}
|
||||
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
{
|
||||
NvU32 gpu_va_bits;
|
||||
NvU32 shift;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (parent_gpu_uses_canonical_form_address(parent_gpu)) {
|
||||
gpu_va_bits = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
shift = 64 - gpu_va_bits;
|
||||
addr = (NvU64)((NvS64)(addr << shift) >> shift);
|
||||
if (platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
else if (gpu_addr_shift == 57)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
|
||||
addr = (NvU64)((NvS64)(addr << (64 - addr_shift)) >> (64 - addr_shift));
|
||||
|
||||
// This protection acts on when the address is not covered by the GPU's
|
||||
// OOR_ADDR_CHECK. This can only happen when OOR_ADDR_CHECK is in
|
||||
// permissive (NO_CHECK) mode.
|
||||
if ((addr << (64 - gpu_addr_shift)) != (input_addr << (64 - gpu_addr_shift)))
|
||||
return input_addr;
|
||||
}
|
||||
|
||||
return addr;
|
||||
@ -351,7 +394,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
|
||||
|
||||
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
|
||||
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 6);
|
||||
|
||||
switch (link_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
|
||||
@ -360,7 +403,6 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
|
||||
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
@ -866,6 +908,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_parent_gpu_t **parent_gpu_out)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
NV_STATUS status;
|
||||
|
||||
parent_gpu = uvm_kvmalloc_zero(sizeof(*parent_gpu));
|
||||
if (!parent_gpu)
|
||||
@ -882,11 +925,14 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
uvm_rb_tree_init(&parent_gpu->tsg_table);
|
||||
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
status = errno_to_nv_status(nv_kthread_q_init(&parent_gpu->lazy_free_q, "vidmem lazy free"));
|
||||
|
||||
nv_kref_init(&parent_gpu->gpu_kref);
|
||||
|
||||
*parent_gpu_out = parent_gpu;
|
||||
|
||||
return NV_OK;
|
||||
return status;
|
||||
}
|
||||
|
||||
// Allocates a uvm_gpu_t struct and initializes the basic fields and leaves all
|
||||
@ -1539,6 +1585,8 @@ static void uvm_parent_gpu_destroy(nv_kref_t *nv_kref)
|
||||
UVM_ASSERT(parent_gpu->num_retained_gpus == 0);
|
||||
UVM_ASSERT(bitmap_empty(parent_gpu->valid_gpus, UVM_ID_MAX_SUB_PROCESSORS));
|
||||
|
||||
nv_kthread_q_stop(&parent_gpu->lazy_free_q);
|
||||
|
||||
for (sub_processor_index = 0; sub_processor_index < UVM_ID_MAX_SUB_PROCESSORS; sub_processor_index++)
|
||||
UVM_ASSERT(!parent_gpu->gpus[sub_processor_index]);
|
||||
|
||||
@ -2165,12 +2213,9 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
|
||||
|
||||
// check for peer-to-peer compatibility (PCI-E or NvLink).
|
||||
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
|
||||
if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|
||||
|| peer_caps->link_type == UVM_GPU_LINK_C2C
|
||||
)
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
|
||||
@ -2553,7 +2598,10 @@ uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu
|
||||
uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu)
|
||||
{
|
||||
// See comment in page_tree_set_location
|
||||
return uvm_gpu_is_virt_mode_sriov_heavy(gpu)? UVM_APERTURE_VID : UVM_APERTURE_DEFAULT;
|
||||
if (uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||||
return UVM_APERTURE_VID;
|
||||
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
|
||||
@ -2964,9 +3012,6 @@ NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -3005,9 +3050,6 @@ NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
|
||||
exit_unlock:
|
||||
uvm_spin_unlock(&gpu->parent->instance_ptr_table_lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
UVM_ASSERT(uvm_va_space_initialized(*out_va_space) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -386,7 +386,8 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct {
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
@ -523,7 +524,6 @@ typedef enum
|
||||
UVM_GPU_LINK_NVLINK_2,
|
||||
UVM_GPU_LINK_NVLINK_3,
|
||||
UVM_GPU_LINK_NVLINK_4,
|
||||
UVM_GPU_LINK_C2C,
|
||||
UVM_GPU_LINK_MAX
|
||||
} uvm_gpu_link_type_t;
|
||||
|
||||
@ -957,6 +957,10 @@ struct uvm_parent_gpu_struct
|
||||
// NUMA info, mainly for ATS
|
||||
uvm_numa_info_t numa_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
@ -1120,7 +1124,8 @@ struct uvm_gpu_peer_struct
|
||||
// deletion.
|
||||
NvHandle p2p_handle;
|
||||
|
||||
struct {
|
||||
struct
|
||||
{
|
||||
struct proc_dir_entry *peer_file[2];
|
||||
struct proc_dir_entry *peer_symlink_file[2];
|
||||
|
||||
@ -1364,6 +1369,16 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns whether the given range is within the GPU's addressable VA ranges in
|
||||
// the internal GPU VA "kernel" address space, which is a linear address space.
|
||||
// Therefore, the input 'addr' must not be in canonical form, even platforms
|
||||
// that use to the canonical form addresses, i.e., ARM64, and x86.
|
||||
// Warning: This only checks whether the GPU's MMU can support the given
|
||||
// address. Some HW units on that GPU might only support a smaller range.
|
||||
//
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
|
@ -459,8 +459,6 @@ static void kill_channel_delayed(void *_user_channel)
|
||||
uvm_user_channel_t *user_channel = (uvm_user_channel_t *)_user_channel;
|
||||
uvm_va_space_t *va_space = user_channel->kill_channel.va_space;
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
if (user_channel->gpu_va_space) {
|
||||
// RM handles the fault, which will do the correct fault reporting in the
|
||||
|
@ -1034,6 +1034,61 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool check_fault_entry_duplicate(const uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = false;
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
}
|
||||
|
||||
return is_duplicate;
|
||||
}
|
||||
|
||||
static void fault_entry_duplicate_flags(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
UVM_ASSERT(previous_entry);
|
||||
UVM_ASSERT(check_fault_entry_duplicate(current_entry, previous_entry));
|
||||
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
|
||||
static void update_batch_context(uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry)
|
||||
{
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
}
|
||||
|
||||
// This function computes the maximum access type that can be serviced for the
|
||||
// reported fault instances given the logical permissions of the VA range. If
|
||||
// none of the fault instances can be serviced UVM_FAULT_ACCESS_TYPE_COUNT is
|
||||
@ -1122,11 +1177,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// - NV_ERR_MORE_PROCESSING_REQUIRED if servicing needs allocation retry
|
||||
// - NV_ERR_NO_MEMORY if the faults could not be serviced due to OOM
|
||||
// - Any other value is a UVM-global error
|
||||
static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -1200,7 +1255,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
if (i > first_fault_index) {
|
||||
previous_entry = ordered_fault_cache[i - 1];
|
||||
is_duplicate = current_entry->fault_address == previous_entry->fault_address;
|
||||
is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
}
|
||||
|
||||
if (block_context->num_retries == 0) {
|
||||
@ -1215,12 +1270,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Service the most intrusive fault per page, only. Waive the rest
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch
|
||||
// faults on the page
|
||||
current_entry->is_invalid_prefetch = previous_entry->is_invalid_prefetch;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
current_entry->is_throttled = previous_entry->is_throttled;
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The previous fault was non-fatal so the page has been already
|
||||
// serviced
|
||||
@ -1316,25 +1366,8 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
// Only update counters the first time since logical permissions cannot
|
||||
// change while we hold the VA space lock
|
||||
// TODO: Bug 1750144: That might not be true with HMM.
|
||||
if (block_context->num_retries == 0) {
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
}
|
||||
if (block_context->num_retries == 0)
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
}
|
||||
|
||||
// Apply the changes computed in the fault service block context, if there
|
||||
@ -1361,10 +1394,10 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
//
|
||||
// See the comments for function service_fault_batch_block_locked for
|
||||
// implementation details and error codes.
|
||||
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -1378,11 +1411,11 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, &va_block_retry,
|
||||
service_batch_managed_faults_in_block_locked(gpu,
|
||||
service_fault_batch_block_locked(gpu,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
first_fault_index,
|
||||
batch_context,
|
||||
first_fault_index,
|
||||
block_faults));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&batch_context->tracker, &va_block->tracker);
|
||||
@ -1402,59 +1435,65 @@ typedef enum
|
||||
FAULT_SERVICE_MODE_CANCEL,
|
||||
} fault_service_mode_t;
|
||||
|
||||
static NV_STATUS service_non_managed_fault(uvm_fault_buffer_entry_t *current_entry,
|
||||
const uvm_fault_buffer_entry_t *previous_entry,
|
||||
NV_STATUS lookup_status,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
static NV_STATUS service_fault_batch_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_fault_utlb_info_t *utlb)
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status = lookup_status;
|
||||
bool is_duplicate = false;
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
UVM_ASSERT(lookup_status != NV_OK);
|
||||
NV_STATUS status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (previous_entry) {
|
||||
is_duplicate = (current_entry->va_space == previous_entry->va_space) &&
|
||||
(current_entry->fault_address == previous_entry->fault_address);
|
||||
|
||||
if (is_duplicate) {
|
||||
// Propagate the is_invalid_prefetch flag across all prefetch faults
|
||||
// on the page
|
||||
if (previous_entry->is_invalid_prefetch)
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
|
||||
// If a page is throttled, all faults on the page must be skipped
|
||||
if (previous_entry->is_throttled)
|
||||
current_entry->is_throttled = true;
|
||||
}
|
||||
}
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// Generate fault events for all fault packets
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu_va_space->gpu->id,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
if (status != NV_ERR_INVALID_ADDRESS)
|
||||
return status;
|
||||
|
||||
if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
// The VA isn't managed. See if ATS knows about it, unless it is a
|
||||
// duplicate and the previous fault was non-fatal so the page has
|
||||
// already been serviced
|
||||
//
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
if (!is_duplicate || previous_entry->is_fatal)
|
||||
status = uvm_ats_service_fault_entry(gpu_va_space, current_entry, ats_invalidate);
|
||||
else
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
// If the VA block cannot be found, set the fatal fault flag,
|
||||
|
||||
(*block_faults)++;
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void service_fault_batch_fatal(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NV_STATUS status,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
const uvm_fault_buffer_entry_t *previous_entry = first_fault_index > 0 ?
|
||||
batch_context->ordered_fault_cache[first_fault_index - 1] : NULL;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
if (is_duplicate)
|
||||
fault_entry_duplicate_flags(current_entry, previous_entry);
|
||||
|
||||
// The VA block cannot be found, set the fatal fault flag,
|
||||
// unless it is a prefetch fault
|
||||
if (current_entry->fault_access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) {
|
||||
current_entry->is_invalid_prefetch = true;
|
||||
@ -1465,31 +1504,59 @@ static NV_STATUS service_non_managed_fault(uvm_fault_buffer_entry_t *current_ent
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
update_batch_context(batch_context, current_entry, previous_entry);
|
||||
|
||||
uvm_perf_event_notify_gpu_fault(¤t_entry->va_space->perf_events,
|
||||
NULL,
|
||||
gpu->id,
|
||||
UVM_ID_INVALID,
|
||||
current_entry,
|
||||
batch_context->batch_id,
|
||||
is_duplicate);
|
||||
|
||||
(*block_faults)++;
|
||||
}
|
||||
|
||||
static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
NvU32 first_fault_index,
|
||||
NvU32 *block_faults)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[first_fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
|
||||
(*block_faults) = 0;
|
||||
|
||||
va_range = uvm_va_range_find(va_space, fault_address);
|
||||
status = uvm_va_block_find_create_in_range(va_space, va_range, fault_address, va_block_context, &va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_fault_batch_block(gpu, va_block, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else if ((status == NV_ERR_INVALID_ADDRESS) && uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_fault_batch_ats(gpu_va_space, mm, batch_context, first_fault_index, block_faults);
|
||||
}
|
||||
else {
|
||||
service_fault_batch_fatal(gpu_va_space->gpu, batch_context, first_fault_index, status, block_faults);
|
||||
|
||||
// Do not fail due to logical errors
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (is_duplicate)
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances;
|
||||
else
|
||||
batch_context->num_duplicate_faults += current_entry->num_instances - 1;
|
||||
|
||||
if (current_entry->is_invalid_prefetch)
|
||||
batch_context->num_invalid_prefetch_faults += current_entry->num_instances;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
}
|
||||
|
||||
if (current_entry->is_throttled)
|
||||
batch_context->has_throttled_faults = true;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Scan the ordered view of faults and group them by different va_blocks.
|
||||
// Service faults for each va_block, in batch.
|
||||
// Scan the ordered view of faults and group them by different va_blocks
|
||||
// (managed faults) and service faults for each va_block, in batch.
|
||||
// Service non-managed faults one at a time as they are encountered during the
|
||||
// scan.
|
||||
//
|
||||
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
|
||||
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
|
||||
@ -1503,9 +1570,9 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
&gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
|
||||
@ -1514,7 +1581,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults;) {
|
||||
uvm_va_block_t *va_block;
|
||||
NvU32 block_faults;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_fault_utlb_info_t *utlb = &batch_context->utlbs[current_entry->fault_source.utlb_id];
|
||||
@ -1548,14 +1614,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (gpu_va_space && gpu_va_space->needs_fault_buffer_flush) {
|
||||
// flush if required and clear the flush flag
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
gpu_va_space->needs_fault_buffer_flush = false;
|
||||
|
||||
if (status == NV_OK)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
@ -1586,49 +1649,22 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
current_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
|
||||
va_block,
|
||||
i,
|
||||
batch_context,
|
||||
&block_faults);
|
||||
|
||||
// When service_batch_managed_faults_in_block returns != NV_OK
|
||||
// something really bad happened
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults);
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
va_space = NULL;
|
||||
continue;
|
||||
}
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
}
|
||||
else {
|
||||
const uvm_fault_buffer_entry_t *previous_entry = i == 0? NULL : batch_context->ordered_fault_cache[i - 1];
|
||||
|
||||
status = service_non_managed_fault(current_entry,
|
||||
previous_entry,
|
||||
status,
|
||||
gpu_va_space,
|
||||
mm,
|
||||
batch_context,
|
||||
ats_invalidate,
|
||||
utlb);
|
||||
|
||||
// When service_non_managed_fault returns != NV_OK something really
|
||||
// bad happened
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
if (replay_per_va_block) {
|
||||
if (replay_per_va_block && !batch_context->has_fatal_faults) {
|
||||
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
@ -131,6 +131,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
uvm_gpu_semaphore_pool_page_t *pool_page;
|
||||
NvU32 *payloads;
|
||||
size_t i;
|
||||
uvm_rm_mem_type_t rm_mem_type = UVM_RM_MEM_TYPE_SYS;
|
||||
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
|
||||
@ -142,7 +143,7 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
pool_page->pool = pool;
|
||||
|
||||
status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
|
||||
UVM_RM_MEM_TYPE_SYS,
|
||||
rm_mem_type,
|
||||
UVM_SEMAPHORE_PAGE_SIZE,
|
||||
0,
|
||||
&pool_page->memory);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -44,6 +44,10 @@
|
||||
#include "clc86f.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static int uvm_downgrade_force_membar_sys = 1;
|
||||
module_param(uvm_downgrade_force_membar_sys, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_downgrade_force_membar_sys, "Force all TLB invalidation downgrades to use MEMBAR_SYS");
|
||||
|
||||
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
|
||||
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
|
||||
#define ARCH_OP_COUNT (sizeof(uvm_arch_hal_t) / sizeof(void *))
|
||||
@ -61,7 +65,7 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = MAXWELL_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.init = uvm_hal_maxwell_ce_init,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_release = uvm_hal_maxwell_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_maxwell_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_maxwell_ce_semaphore_reduction_inc,
|
||||
@ -69,11 +73,11 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.offset_in_out = uvm_hal_maxwell_ce_offset_in_out,
|
||||
.phys_mode = uvm_hal_maxwell_ce_phys_mode,
|
||||
.plc_mode = uvm_hal_maxwell_ce_plc_mode,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memcopy = uvm_hal_maxwell_ce_memcopy,
|
||||
.memcopy_v_to_v = uvm_hal_maxwell_ce_memcopy_v_to_v,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
.memset_1 = uvm_hal_maxwell_ce_memset_1,
|
||||
.memset_4 = uvm_hal_maxwell_ce_memset_4,
|
||||
.memset_8 = uvm_hal_maxwell_ce_memset_8,
|
||||
@ -99,7 +103,15 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
{
|
||||
.id = VOLTA_DMA_COPY_A,
|
||||
.parent_id = PASCAL_DMA_COPY_B,
|
||||
.u.ce_ops = {},
|
||||
.u.ce_ops = {
|
||||
.semaphore_release = uvm_hal_volta_ce_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_volta_ce_semaphore_timestamp,
|
||||
.semaphore_reduction_inc = uvm_hal_volta_ce_semaphore_reduction_inc,
|
||||
.memcopy = uvm_hal_volta_ce_memcopy,
|
||||
.memset_1 = uvm_hal_volta_ce_memset_1,
|
||||
.memset_4 = uvm_hal_volta_ce_memset_4,
|
||||
.memset_8 = uvm_hal_volta_ce_memset_8,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = TURING_DMA_COPY_A,
|
||||
@ -110,22 +122,22 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.id = AMPERE_DMA_COPY_A,
|
||||
.parent_id = TURING_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_ampere_ce_method_validate_c6b5,
|
||||
.method_is_valid = uvm_hal_ampere_ce_method_is_valid_c6b5,
|
||||
.phys_mode = uvm_hal_ampere_ce_phys_mode,
|
||||
.memcopy_validate = uvm_hal_ampere_ce_memcopy_validate_c6b5,
|
||||
.memcopy_is_valid = uvm_hal_ampere_ce_memcopy_is_valid_c6b5,
|
||||
.memcopy_patch_src = uvm_hal_ampere_ce_memcopy_patch_src_c6b5,
|
||||
.memset_validate = uvm_hal_ampere_ce_memset_validate_c6b5,
|
||||
.memset_is_valid = uvm_hal_ampere_ce_memset_is_valid_c6b5,
|
||||
},
|
||||
},
|
||||
{
|
||||
.id = AMPERE_DMA_COPY_B,
|
||||
.parent_id = AMPERE_DMA_COPY_A,
|
||||
.u.ce_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.plc_mode = uvm_hal_ampere_ce_plc_mode_c7b5,
|
||||
.memcopy_validate = uvm_hal_ce_memcopy_validate_stub,
|
||||
.memcopy_is_valid = uvm_hal_ce_memcopy_is_valid_stub,
|
||||
.memcopy_patch_src = uvm_hal_ce_memcopy_patch_src_stub,
|
||||
.memset_validate = uvm_hal_ce_memset_validate_stub,
|
||||
.memset_is_valid = uvm_hal_ce_memset_is_valid_stub,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -140,6 +152,8 @@ static uvm_hal_class_ops_t ce_table[] =
|
||||
.memset_1 = uvm_hal_hopper_ce_memset_1,
|
||||
.memset_4 = uvm_hal_hopper_ce_memset_4,
|
||||
.memset_8 = uvm_hal_hopper_ce_memset_8,
|
||||
.memcopy_is_valid = uvm_hal_hopper_ce_memcopy_is_valid,
|
||||
.memset_is_valid = uvm_hal_hopper_ce_memset_is_valid,
|
||||
},
|
||||
},
|
||||
};
|
||||
@ -152,8 +166,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = KEPLER_CHANNEL_GPFIFO_B,
|
||||
.u.host_ops = {
|
||||
.init = uvm_hal_maxwell_host_init_noop,
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.wait_for_idle = uvm_hal_maxwell_host_wait_for_idle,
|
||||
.membar_sys = uvm_hal_maxwell_host_membar_sys,
|
||||
// No MEMBAR GPU until Pascal, just do a MEMBAR SYS.
|
||||
@ -235,8 +249,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.parent_id = TURING_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_ampere_host_method_validate,
|
||||
.sw_method_validate = uvm_hal_ampere_host_sw_method_validate,
|
||||
.method_is_valid = uvm_hal_ampere_host_method_is_valid,
|
||||
.sw_method_is_valid = uvm_hal_ampere_host_sw_method_is_valid,
|
||||
.clear_faulted_channel_sw_method = uvm_hal_ampere_host_clear_faulted_channel_sw_method,
|
||||
.clear_faulted_channel_register = uvm_hal_ampere_host_clear_faulted_channel_register,
|
||||
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
|
||||
@ -248,8 +262,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.id = HOPPER_CHANNEL_GPFIFO_A,
|
||||
.parent_id = AMPERE_CHANNEL_GPFIFO_A,
|
||||
.u.host_ops = {
|
||||
.method_validate = uvm_hal_method_validate_stub,
|
||||
.sw_method_validate = uvm_hal_method_validate_stub,
|
||||
.method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.sw_method_is_valid = uvm_hal_method_is_valid_stub,
|
||||
.semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
|
||||
.semaphore_release = uvm_hal_hopper_host_semaphore_release,
|
||||
.semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
|
||||
@ -637,14 +651,20 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
// Access counters are currently not supported in vGPU.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
parent_gpu->arch_hal->init_properties(parent_gpu);
|
||||
|
||||
// Override the HAL when in non-passthrough virtualization
|
||||
// TODO: Bug 200692962: [UVM] Add support for access counters in UVM on SR-IOV configurations
|
||||
if (parent_gpu->virt_mode != UVM_VIRT_MODE_NONE)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
hal_override_properties(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
@ -663,6 +683,44 @@ void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
uvm_hal_membar(gpu, push, membar);
|
||||
}
|
||||
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
if (membar == UVM_MEMBAR_NONE) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU) {
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the engine flush as
|
||||
// it doesn't have this capability.
|
||||
uvm_hal_wfi_membar(push, UVM_MEMBAR_GPU);
|
||||
return false;
|
||||
}
|
||||
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem)
|
||||
{
|
||||
// If the mapped memory was local, and we're not using a coherence protocol,
|
||||
// we only need a GPU-local membar. This is because all accesses to this
|
||||
// memory, including those from other processors like the CPU or peer GPUs,
|
||||
// must come through this GPU's L2. In all current architectures, MEMBAR_GPU
|
||||
// is sufficient to resolve ordering at the L2 level.
|
||||
if (is_local_vidmem && !gpu->parent->numa_info.enabled && !uvm_downgrade_force_membar_sys)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
// If the mapped memory was remote, or if a coherence protocol can cache
|
||||
// this GPU's memory, then there are external ways for other processors to
|
||||
// access the memory without always going the local GPU L2, so we must use a
|
||||
// MEMBAR_SYS.
|
||||
return UVM_MEMBAR_SYS;
|
||||
}
|
||||
|
||||
const char *uvm_aperture_string(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_APERTURE_MAX != 12);
|
||||
@ -823,12 +881,12 @@ void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_e
|
||||
UVM_DBG_PRINT(" tag %x\n", entry->tag);
|
||||
}
|
||||
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@ -837,7 +895,7 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
|
||||
{
|
||||
}
|
||||
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
// A dummy method validation that always returns true; it can be used to skip
|
||||
// CE/Host/SW method validations for a given architecture
|
||||
bool uvm_hal_method_validate_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
typedef void (*uvm_hal_init_t)(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_ce_init(uvm_push_t *push);
|
||||
@ -42,12 +42,12 @@ void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
|
||||
void uvm_hal_pascal_host_init(uvm_push_t *push);
|
||||
|
||||
// Host method validation
|
||||
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// SW method validation
|
||||
typedef bool (*uvm_hal_host_sw_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_host_sw_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Wait for idle
|
||||
typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
|
||||
@ -208,6 +208,7 @@ typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU3
|
||||
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
@ -220,6 +221,7 @@ void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
|
||||
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_volta_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
|
||||
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
|
||||
@ -272,16 +274,17 @@ NvU32 uvm_hal_maxwell_ce_plc_mode(void);
|
||||
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
|
||||
|
||||
// CE method validation
|
||||
typedef bool (*uvm_hal_ce_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Memcopy validation.
|
||||
// The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
|
||||
// execution. Use uvm_hal_ce_memcopy_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memcopy_validate)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
|
||||
|
||||
// Patching of the memcopy source; if not needed for a given architecture use
|
||||
// the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
|
||||
@ -296,6 +299,7 @@ void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_
|
||||
// UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
|
||||
typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
void uvm_hal_volta_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
|
||||
|
||||
// Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
|
||||
typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
|
||||
@ -303,11 +307,12 @@ void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, s
|
||||
|
||||
// Memset validation.
|
||||
// The validation happens at the start of the memset (uvm_hal_memset_*_t)
|
||||
// execution. Use uvm_hal_ce_memset_validate_stub to skip the validation for
|
||||
// execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
|
||||
// a given architecture.
|
||||
typedef bool (*uvm_hal_ce_memset_validate)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_validate_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size);
|
||||
|
||||
// Memset size bytes at dst to a given N-byte input value.
|
||||
//
|
||||
@ -329,6 +334,10 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
|
||||
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
|
||||
|
||||
void uvm_hal_volta_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_volta_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
|
||||
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
|
||||
@ -342,6 +351,7 @@ void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 v
|
||||
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_volta_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
|
||||
|
||||
// Initialize GPU architecture dependent properties
|
||||
@ -579,8 +589,8 @@ void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_
|
||||
struct uvm_host_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_host_method_validate method_validate;
|
||||
uvm_hal_host_sw_method_validate sw_method_validate;
|
||||
uvm_hal_host_method_is_valid method_is_valid;
|
||||
uvm_hal_host_sw_method_is_valid sw_method_is_valid;
|
||||
uvm_hal_wait_for_idle_t wait_for_idle;
|
||||
uvm_hal_membar_sys_t membar_sys;
|
||||
uvm_hal_membar_gpu_t membar_gpu;
|
||||
@ -612,18 +622,18 @@ struct uvm_host_hal_struct
|
||||
struct uvm_ce_hal_struct
|
||||
{
|
||||
uvm_hal_init_t init;
|
||||
uvm_hal_ce_method_validate method_validate;
|
||||
uvm_hal_ce_method_is_valid method_is_valid;
|
||||
uvm_hal_semaphore_release_t semaphore_release;
|
||||
uvm_hal_semaphore_timestamp_t semaphore_timestamp;
|
||||
uvm_hal_ce_offset_out_t offset_out;
|
||||
uvm_hal_ce_offset_in_out_t offset_in_out;
|
||||
uvm_hal_ce_phys_mode_t phys_mode;
|
||||
uvm_hal_ce_plc_mode_t plc_mode;
|
||||
uvm_hal_ce_memcopy_validate memcopy_validate;
|
||||
uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
|
||||
uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
|
||||
uvm_hal_memcopy_t memcopy;
|
||||
uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
|
||||
uvm_hal_ce_memset_validate memset_validate;
|
||||
uvm_hal_ce_memset_is_valid memset_is_valid;
|
||||
uvm_hal_memset_1_t memset_1;
|
||||
uvm_hal_memset_4_t memset_4;
|
||||
uvm_hal_memset_8_t memset_8;
|
||||
@ -726,4 +736,20 @@ static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
|
||||
// appropriate Host membar(s) after a TLB invalidate.
|
||||
void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
|
||||
|
||||
// Internal helper used by architectures/engines that don't support a FLUSH
|
||||
// operation with a FLUSH_TYPE on the semaphore release method, e.g., pre-Volta
|
||||
// CE. It inspects and clears the MEMBAR push flags, issues a Host WFI +
|
||||
// membar.gpu for MEMBAR_GPU or returns true to indicate the caller to use the
|
||||
// engine's FLUSH for MEMBAR_SYS.
|
||||
bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
|
||||
|
||||
// Determine the appropriate membar to use on TLB invalidates for GPU PTE
|
||||
// permissions downgrades.
|
||||
//
|
||||
// gpu is the GPU on which the TLB invalidate is happening.
|
||||
//
|
||||
// is_local_vidmem indicates whether all mappings being invalidated pointed to
|
||||
// the local GPU's memory.
|
||||
uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
|
||||
|
||||
#endif // __UVM_HAL_H__
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -37,19 +37,10 @@ typedef struct
|
||||
// This stores pointers to uvm_va_block_t for HMM blocks.
|
||||
uvm_range_tree_t blocks;
|
||||
uvm_mutex_t blocks_lock;
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// This flag is set true by default for each va_space so most processes
|
||||
// don't see partially implemented UVM-HMM behavior but can be enabled by
|
||||
// test code for a given va_space so the test process can do some interim
|
||||
// testing. It needs to be a separate flag instead of modifying
|
||||
// uvm_disable_hmm or va_space->flags since those are user inputs and are
|
||||
// visible/checked by test code.
|
||||
// Remove this when UVM-HMM is fully integrated into chips_a.
|
||||
bool disable;
|
||||
} uvm_hmm_va_space_t;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Tells whether HMM is enabled for the given va_space.
|
||||
// If it is not enabled, all of the functions below are no-ops.
|
||||
bool uvm_hmm_is_enabled(uvm_va_space_t *va_space);
|
||||
@ -62,17 +53,25 @@ typedef struct
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize(uvm_va_space_t *va_space);
|
||||
|
||||
// Initialize HMM for the given the va_space for testing.
|
||||
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
|
||||
// the UVM driver. Remove this when enough HMM functionality is implemented.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space);
|
||||
|
||||
// Unmap all page tables in this VA space which map memory owned by this
|
||||
// GPU. Any memory still resident on this GPU will be evicted to system
|
||||
// memory. Note that 'mm' can be NULL (e.g., when closing the UVM file)
|
||||
// in which case any GPU memory is simply freed.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Destroy the VA space's mappings on the GPU, if it has any.
|
||||
// Locking: if mm is not NULL, the caller must hold mm->mmap_lock in at
|
||||
// least read mode and the va_space lock must be held in write mode.
|
||||
void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm);
|
||||
|
||||
// Find an existing HMM va_block.
|
||||
// This function can be called without having retained and locked the mm,
|
||||
// but in that case, the only allowed operations on the returned block are
|
||||
@ -85,6 +84,25 @@ typedef struct
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find an existing HMM va_block when processing a CPU fault and try to
|
||||
// isolate and lock the faulting page.
|
||||
// Return NV_ERR_INVALID_ADDRESS if the block is not found,
|
||||
// NV_ERR_BUSY_RETRY if the page could not be locked, and
|
||||
// NV_OK if the block is found and the page is locked. Also,
|
||||
// uvm_hmm_cpu_fault_finish() must be called if NV_OK is returned.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// This must be called after uvm_va_block_cpu_fault() if
|
||||
// uvm_hmm_va_block_cpu_find() returns NV_OK.
|
||||
// Locking: This must be called with the vma->vm_mm locked and the va_space
|
||||
// read locked.
|
||||
void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context);
|
||||
|
||||
// Find or create a new HMM va_block.
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
@ -114,7 +132,7 @@ typedef struct
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
@ -168,7 +186,8 @@ typedef struct
|
||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Set the accessed by policy for the given range. This also tries to
|
||||
// map the range. Note that 'last_address' is inclusive.
|
||||
@ -178,7 +197,17 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address);
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Deferred work item to reestablish accessed by mappings after eviction. On
|
||||
// GPUs with access counters enabled, the evicted GPU will also get remote
|
||||
// mappings.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked
|
||||
// and the va_space lock must be held in at least read mode.
|
||||
void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context);
|
||||
|
||||
// Set the read duplication policy for the given range.
|
||||
// Note that 'last_address' is inclusive.
|
||||
@ -248,7 +277,104 @@ typedef struct
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr);
|
||||
|
||||
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp);
|
||||
// This is called to service a GPU fault.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked,
|
||||
// the va_space read lock must be held, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This is called to migrate a region within a HMM va_block.
|
||||
// va_block_context must not be NULL and va_block_context->policy and
|
||||
// va_block_context->hmm.vma must be valid.
|
||||
// Locking: the va_block_context->mm must be retained, mmap_lock must be
|
||||
// locked, and the va_block lock held.
|
||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// This is called to migrate an address range of HMM allocations via
|
||||
// UvmMigrate().
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
//
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||
// the va_space read lock must be held.
|
||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// This sets the va_block_context->hmm.src_pfns[] to the ZONE_DEVICE private
|
||||
// PFN for the GPU chunk memory.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region);
|
||||
|
||||
// Migrate pages to system memory for the given page mask.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// This must be called after uvm_hmm_va_block_evict_chunk_prep() has
|
||||
// initialized va_block_context->hmm.src_pfns[] for the source GPU physical
|
||||
// PFNs being migrated. Note that the input mask 'pages_to_evict' can be
|
||||
// modified. If any of the evicted pages has the accessed by policy set,
|
||||
// then record that by setting out_accessed_by_set.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set);
|
||||
|
||||
// Migrate pages from the given GPU to system memory for the given page
|
||||
// mask and region. va_block_context must not be NULL.
|
||||
// Note that the mmap lock is not held and there is no MM retained.
|
||||
// Locking: the va_block lock must be locked.
|
||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Migrate a GPU chunk to system memory. This called to remove CPU page
|
||||
// table references to device private struct pages for the given GPU after
|
||||
// all other references in va_blocks have been released and the GPU is
|
||||
// in the process of being removed/torn down. Note that there is no mm,
|
||||
// VMA, va_block or any user channel activity on this GPU.
|
||||
NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk);
|
||||
|
||||
// This returns what would be the intersection of va_block start/end and
|
||||
// VMA start/end-1 for the given 'lookup_address' if
|
||||
// uvm_hmm_va_block_find_create() was called.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params);
|
||||
|
||||
// This updates the HMM va_block CPU residency information for a single
|
||||
// page at 'lookup_address' by calling hmm_range_fault(). If 'populate' is
|
||||
// true, the CPU page will be faulted in read/write or read-only
|
||||
// (depending on the permission of the underlying VMA at lookup_address).
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode and
|
||||
// the va_space lock must be held in at least read mode.
|
||||
NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate);
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp);
|
||||
@ -285,12 +411,17 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
static void uvm_hmm_unregister_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu, struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
static void uvm_hmm_remove_gpu_va_space(uvm_va_space_t *va_space,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
|
||||
@ -301,6 +432,18 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_cpu_find(uvm_va_space_t *va_space,
|
||||
uvm_service_block_context_t *service_context,
|
||||
struct vm_fault *vmf,
|
||||
uvm_va_block_t **va_block_ptr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_cpu_fault_finish(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@ -314,7 +457,7 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
static bool uvm_hmm_check_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
@ -349,7 +492,8 @@ typedef struct
|
||||
static NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||
uvm_processor_id_t preferred_location,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
@ -358,11 +502,18 @@ typedef struct
|
||||
uvm_processor_id_t processor_id,
|
||||
bool set_bit,
|
||||
NvU64 base,
|
||||
NvU64 last_address)
|
||||
NvU64 last_address,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static void uvm_hmm_block_add_eviction_mappings(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *block_context)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@ -405,9 +556,84 @@ typedef struct
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
|
||||
static NV_STATUS uvm_hmm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 base,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_migrate_mode_t mode,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_gpu_chunk_t *gpu_chunk,
|
||||
uvm_va_block_region_t chunk_region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region,
|
||||
bool *out_accessed_by_set)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
const uvm_page_mask_t *pages_to_evict,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_pmm_gpu_evict_chunk(uvm_gpu_t *gpu,
|
||||
uvm_gpu_chunk_t *gpu_chunk)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_range_bounds(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp,
|
||||
UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_update_residency_info(uvm_va_block_t *va_block,
|
||||
struct mm_struct *mm,
|
||||
NvU64 lookup_address,
|
||||
bool populate)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
|
@ -1,90 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_test.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_hmm.h"
|
||||
|
||||
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
struct mm_struct *mm;
|
||||
uvm_va_block_t *hmm_block = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
uvm_down_write_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
// TODO: Bug 3351822: [UVM-HMM] Remove temporary testing changes.
|
||||
// By default, HMM is enabled system wide but disabled per va_space.
|
||||
// This will initialize the va_space for HMM.
|
||||
status = uvm_hmm_va_space_initialize_test(va_space);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
|
||||
uvm_down_read_mmap_lock(mm);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Try to create an HMM va_block to virtual address zero (NULL).
|
||||
// It should fail. There should be no VMA but a va_block for range
|
||||
// [0x0 0x1fffff] is possible.
|
||||
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block which overlaps a managed block.
|
||||
// It should fail.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->hmm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
// Try to find an existing HMM va_block; it should succeed.
|
||||
status = uvm_hmm_va_block_find(va_space, params->hmm_address, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
}
|
@ -54,6 +54,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
|
||||
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
|
||||
|
||||
|
||||
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
|
||||
|
||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020 NVIDIA Corporation
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -23,25 +23,9 @@
|
||||
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_mem.h"
|
||||
#include "clc8b5.h"
|
||||
|
||||
static void hopper_membar_after_transfer(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
return;
|
||||
|
||||
// TODO: [UVM-Volta] Remove Host WFI + Membar WAR for CE flush-only bug
|
||||
// http://nvbugs/1734761
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
else
|
||||
gpu->parent->host_hal->membar_sys(push);
|
||||
}
|
||||
|
||||
static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||||
{
|
||||
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
|
||||
@ -78,45 +62,32 @@ void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
|
||||
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool hopper_membar_before_semaphore(uvm_push_t *push)
|
||||
// Return the flush type and the flush enablement.
|
||||
static NvU32 hopper_get_flush_value(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 flush_value;
|
||||
uvm_membar_t membar = uvm_push_get_and_reset_membar_flag(push);
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
if (membar == UVM_MEMBAR_NONE) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
}
|
||||
else {
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
|
||||
if (membar == UVM_MEMBAR_GPU)
|
||||
flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, GL);
|
||||
else
|
||||
flush_value |= HWCONST(C8B5, LAUNCH_DMA, FLUSH_TYPE, SYS);
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Bug 1734761: Remove the HOST WFI+membar WAR, i.e, perform the CE
|
||||
// flush when MEMBAR GPU is requested.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
return flush_value;
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@ -124,7 +95,7 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
@ -133,16 +104,7 @@ void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@ -150,7 +112,7 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
|
||||
@ -162,16 +124,7 @@ void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 flush_value;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = hopper_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
else
|
||||
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
|
||||
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
|
||||
@ -180,7 +133,7 @@ void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
|
||||
NV_PUSH_1U(C8B5, LAUNCH_DMA, hopper_get_flush_value(push) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
|
||||
launch_dma_plc_mode);
|
||||
@ -218,8 +171,9 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
NvU32 launch_dma_plc_mode;
|
||||
NvU32 launch_dma_remap_enable;
|
||||
NvU32 launch_dma_scrub_enable;
|
||||
NvU32 flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
@ -252,6 +206,10 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
do {
|
||||
NvU32 memset_this_time = (NvU32)min(num_elements, max_single_memset);
|
||||
|
||||
// In the last operation, a flush/membar may be issued after the memset.
|
||||
if (num_elements == memset_this_time)
|
||||
flush_value = hopper_get_flush_value(push);
|
||||
|
||||
gpu->parent->ce_hal->offset_out(push, dst.address);
|
||||
|
||||
NV_PUSH_1U(C8B5, LINE_LENGTH_IN, memset_this_time);
|
||||
@ -260,7 +218,7 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
|
||||
HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
|
||||
flush_value |
|
||||
launch_dma_remap_enable |
|
||||
launch_dma_scrub_enable |
|
||||
launch_dma_dst_type |
|
||||
@ -269,10 +227,8 @@ static void hopper_memset_common(uvm_push_t *push,
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
num_elements -= memset_this_time;
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
} while (num_elements > 0);
|
||||
|
||||
hopper_membar_after_transfer(push);
|
||||
}
|
||||
|
||||
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
|
||||
@ -337,3 +293,16 @@ void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 v
|
||||
|
||||
hopper_memset_common(push, dst, size, 4);
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_DEVICE_RANGE_PRESENT)
|
||||
#define UVM_IS_CONFIG_HMM() 1
|
||||
#else
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
@ -404,6 +404,7 @@ static inline NvU64 NV_GETTIME(void)
|
||||
// 654672d4ba1a6001c365833be895f9477c4d5eab ("locking/atomics:
|
||||
// Add _{acquire|release|relaxed}() variants of some atomic operations") in v4.3
|
||||
// (2015-08-06).
|
||||
// TODO: Bug 3849079: We always use this definition on newer kernels.
|
||||
#ifndef atomic_read_acquire
|
||||
#define atomic_read_acquire(p) smp_load_acquire(&(p)->counter)
|
||||
#endif
|
||||
@ -412,6 +413,24 @@ static inline NvU64 NV_GETTIME(void)
|
||||
#define atomic_set_release(p, v) smp_store_release(&(p)->counter, v)
|
||||
#endif
|
||||
|
||||
// atomic_long_read_acquire and atomic_long_set_release were added in commit
|
||||
// b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb ("locking/atomics: Switch to
|
||||
// generated atomic-long") in v5.1 (2019-05-05).
|
||||
// TODO: Bug 3849079: We always use these definitions on newer kernels.
|
||||
#define atomic_long_read_acquire uvm_atomic_long_read_acquire
|
||||
static inline long uvm_atomic_long_read_acquire(atomic_long_t *p)
|
||||
{
|
||||
long val = atomic_long_read(p);
|
||||
smp_mb();
|
||||
return val;
|
||||
}
|
||||
|
||||
#define atomic_long_set_release uvm_atomic_long_set_release
|
||||
static inline void uvm_atomic_long_set_release(atomic_long_t *p, long v)
|
||||
{
|
||||
smp_mb();
|
||||
atomic_long_set(p, v);
|
||||
}
|
||||
|
||||
// Added in 3.11
|
||||
#ifndef PAGE_ALIGNED
|
||||
|
@ -75,26 +75,29 @@
|
||||
//
|
||||
// Protects:
|
||||
// - gpu->parent->isr.replayable_faults.service_lock:
|
||||
// Changes to the state of a GPU as it transitions from top-half to bottom-half
|
||||
// interrupt handler for replayable faults. This lock is acquired for that GPU,
|
||||
// in the ISR top-half. Then a bottom-half is scheduled (to run in a workqueue).
|
||||
// Then the bottom-half releases the lock when that GPU's processing appears to
|
||||
// be done.
|
||||
// Changes to the state of a GPU as it transitions from top-half to
|
||||
// bottom-half interrupt handler for replayable faults. This lock is
|
||||
// acquired for that GPU, in the ISR top-half. Then a bottom-half is
|
||||
// scheduled (to run in a workqueue). Then the bottom-half releases the
|
||||
// lock when that GPU's processing appears to be done.
|
||||
//
|
||||
// - gpu->parent->isr.non_replayable_faults.service_lock:
|
||||
// Changes to the state of a GPU in the bottom-half for non-replayable faults.
|
||||
// Non-replayable faults are handed-off from RM instead of directly from the GPU
|
||||
// hardware. This means that we do not keep receiving interrupts after RM pops
|
||||
// out the faults from the HW buffer. In order not to miss fault notifications,
|
||||
// we will always schedule a bottom-half for non-replayable faults if there are
|
||||
// faults ready to be consumed in the buffer, even if there already is some
|
||||
// bottom-half running or scheduled. This lock serializes all scheduled bottom
|
||||
// halves per GPU which service non-replayable faults.
|
||||
// Changes to the state of a GPU in the bottom-half for non-replayable
|
||||
// faults. Non-replayable faults are handed-off from RM instead of
|
||||
// directly from the GPU hardware. This means that we do not keep
|
||||
// receiving interrupts after RM pops out the faults from the HW buffer.
|
||||
// In order not to miss fault notifications, we will always schedule a
|
||||
// bottom-half for non-replayable faults if there are faults ready to be
|
||||
// consumed in the buffer, even if there already is some bottom-half
|
||||
// running or scheduled. This lock serializes all scheduled bottom halves
|
||||
// per GPU which service non-replayable faults.
|
||||
//
|
||||
// - gpu->parent->isr.access_counters.service_lock:
|
||||
// Changes to the state of a GPU as it transitions from top-half to bottom-half
|
||||
// interrupt handler for access counter notifications. This lock is acquired for
|
||||
// that GPU, in the ISR top-half. Then a bottom-half is scheduled (to run in a
|
||||
// workqueue). Then the bottom-half releases the lock when that GPU's processing
|
||||
// appears to be done.
|
||||
// Changes to the state of a GPU as it transitions from top-half to
|
||||
// bottom-half interrupt handler for access counter notifications. This
|
||||
// lock is acquired for that GPU, in the ISR top-half. Then a bottom-half
|
||||
// is scheduled (to run in a workqueue). Then the bottom-half releases
|
||||
// the lock when that GPU's processing appears to be done.
|
||||
//
|
||||
// - mmap_lock (mmap_sem in kernels < 5.8)
|
||||
// Order: UVM_LOCK_ORDER_MMAP_LOCK
|
||||
@ -339,7 +342,9 @@
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
//
|
||||
// Lock protecting the state of all the channels in a channel pool.
|
||||
// Lock protecting the state of all the channels in a channel pool. The
|
||||
// channel pool lock documentation contains the guidelines about which lock
|
||||
// type (mutex or spinlock) to use.
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
|
@ -106,6 +106,9 @@ static NV_STATUS uvm_pte_buffer_init(uvm_va_range_t *va_range,
|
||||
pte_buffer->mapping_info.formatType = map_rm_params->format_type;
|
||||
pte_buffer->mapping_info.elementBits = map_rm_params->element_bits;
|
||||
pte_buffer->mapping_info.compressionType = map_rm_params->compression_type;
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL)
|
||||
pte_buffer->mapping_info.mappingPageSize = page_size;
|
||||
|
||||
pte_buffer->page_size = page_size;
|
||||
pte_buffer->pte_size = uvm_mmu_pte_size(tree, page_size);
|
||||
num_all_ptes = uvm_div_pow2_64(length, page_size);
|
||||
@ -341,9 +344,8 @@ static NV_STATUS map_rm_pt_range(uvm_page_tree_t *tree,
|
||||
static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_gpu_map_t *ext_gpu_map)
|
||||
{
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_CHANNEL) {
|
||||
if (va_range->channel.aperture == UVM_APERTURE_VID)
|
||||
return UVM_MEMBAR_GPU;
|
||||
return UVM_MEMBAR_SYS;
|
||||
return uvm_hal_downgrade_membar_type(va_range->channel.gpu_va_space->gpu,
|
||||
va_range->channel.aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
// If there is no mem_handle, this is a sparse mapping.
|
||||
@ -353,9 +355,8 @@ static uvm_membar_t va_range_downgrade_membar(uvm_va_range_t *va_range, uvm_ext_
|
||||
if (!ext_gpu_map->mem_handle)
|
||||
return UVM_MEMBAR_GPU;
|
||||
|
||||
if (ext_gpu_map->is_sysmem || ext_gpu_map->gpu != ext_gpu_map->owning_gpu)
|
||||
return UVM_MEMBAR_SYS;
|
||||
return UVM_MEMBAR_GPU;
|
||||
return uvm_hal_downgrade_membar_type(ext_gpu_map->gpu,
|
||||
!ext_gpu_map->is_sysmem && ext_gpu_map->gpu == ext_gpu_map->owning_gpu);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
@ -398,9 +399,7 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
|
||||
page_tree = &gpu_va_space->page_tables;
|
||||
|
||||
// Verify that the GPU VA space supports this page size
|
||||
if ((mem_info->pageSize & page_tree->hal->page_sizes()) == 0)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
UVM_ASSERT(uvm_mmu_page_size_supported(page_tree, mem_info->pageSize));
|
||||
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
||||
// We should be never called with ext_gpu_map == NULL
|
||||
@ -414,13 +413,12 @@ NV_STATUS uvm_va_range_map_rm_allocation(uvm_va_range_t *va_range,
|
||||
pt_range_vec = &va_range->channel.pt_range_vec;
|
||||
}
|
||||
|
||||
if (!IS_ALIGNED(map_offset, mem_info->pageSize) ||
|
||||
map_offset + uvm_range_tree_node_size(node) > mem_info->size)
|
||||
if (map_offset + uvm_range_tree_node_size(node) > mem_info->size)
|
||||
return NV_ERR_INVALID_OFFSET;
|
||||
|
||||
// Consolidate input checks for API-level callers
|
||||
if (!IS_ALIGNED(node->start, mem_info->pageSize) || !IS_ALIGNED(node->end + 1, mem_info->pageSize))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
UVM_ASSERT(IS_ALIGNED(node->start, mem_info->pageSize) &&
|
||||
IS_ALIGNED(node->end + 1, mem_info->pageSize) &&
|
||||
IS_ALIGNED(map_offset, mem_info->pageSize));
|
||||
|
||||
status = uvm_pte_buffer_init(va_range,
|
||||
mapping_gpu,
|
||||
@ -845,6 +843,10 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
uvm_ext_gpu_map_t *ext_gpu_map = NULL;
|
||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||
UvmGpuMemoryInfo mem_info;
|
||||
uvm_gpu_va_space_t *gpu_va_space = uvm_gpu_va_space_get(va_space, mapping_gpu);
|
||||
NvU32 mapping_page_size;
|
||||
NvU64 alignments;
|
||||
NvU32 smallest_alignment;
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
@ -915,12 +917,25 @@ static NV_STATUS uvm_map_external_allocation_on_gpu(uvm_va_range_t *va_range,
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range,
|
||||
mapping_gpu,
|
||||
&mem_info,
|
||||
map_rm_params,
|
||||
ext_gpu_map,
|
||||
out_tracker);
|
||||
// Determine the proper mapping page size.
|
||||
// This will be the largest supported page size less than or equal to the
|
||||
// smallest of the base VA address, length, offset, and allocation page size
|
||||
// alignments.
|
||||
alignments = mem_info.pageSize | base | length | map_rm_params->map_offset;
|
||||
smallest_alignment = alignments & ~(alignments - 1);
|
||||
|
||||
// Check that alignment bits did not get truncated.
|
||||
UVM_ASSERT(smallest_alignment);
|
||||
|
||||
mapping_page_size = uvm_mmu_biggest_page_size_up_to(&gpu_va_space->page_tables, smallest_alignment);
|
||||
if (!mapping_page_size) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto error;
|
||||
}
|
||||
|
||||
mem_info.pageSize = mapping_page_size;
|
||||
|
||||
status = uvm_va_range_map_rm_allocation(va_range, mapping_gpu, &mem_info, map_rm_params, ext_gpu_map, out_tracker);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -50,38 +50,12 @@ void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 o
|
||||
OFFSET_OUT_LOWER, HWVALUE(B0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool maxwell_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the CE flush as CE
|
||||
// doesn't have this capability.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@ -102,7 +76,7 @@ void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va,
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@ -126,7 +100,7 @@ void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
NvU32 flush_value;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = maxwell_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(B0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@ -221,10 +195,9 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
NvU32 pipelined_value;
|
||||
NvU32 launch_dma_src_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool first_operation = true;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_validate(push, dst, src),
|
||||
"Memcopy validation failed in channel %s, GPU %s",
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memcopy_is_valid(push, dst, src),
|
||||
"Memcopy validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
@ -233,14 +206,14 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
launch_dma_src_dst_type = gpu->parent->ce_hal->phys_mode(push, dst, src);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
do {
|
||||
NvU32 copy_this_time = (NvU32)min(size, max_single_copy_size);
|
||||
|
||||
if (first_operation && uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
else
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
|
||||
do {
|
||||
NvU32 copy_this_time = (NvU32)min(size, max_single_copy_size);
|
||||
|
||||
gpu->parent->ce_hal->offset_in_out(push, src.address, dst.address);
|
||||
|
||||
NV_PUSH_1U(B0B5, LINE_LENGTH_IN, copy_this_time);
|
||||
@ -255,10 +228,10 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
launch_dma_plc_mode |
|
||||
pipelined_value);
|
||||
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
dst.address += copy_this_time;
|
||||
src.address += copy_this_time;
|
||||
size -= copy_this_time;
|
||||
first_operation = false;
|
||||
} while (size > 0);
|
||||
|
||||
maxwell_membar_after_transfer(push);
|
||||
@ -266,11 +239,14 @@ void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu
|
||||
|
||||
void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, size_t size)
|
||||
{
|
||||
uvm_hal_maxwell_ce_memcopy(push, uvm_gpu_address_virtual(dst_va), uvm_gpu_address_virtual(src_va), size);
|
||||
uvm_push_get_gpu(push)->parent->ce_hal->memcopy(push,
|
||||
uvm_gpu_address_virtual(dst_va),
|
||||
uvm_gpu_address_virtual(src_va),
|
||||
size);
|
||||
}
|
||||
|
||||
// Push SET_DST_PHYS mode if needed and return LAUNCH_DMA_DST_TYPE flags
|
||||
static NvU32 memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
static NvU32 maxwell_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
|
||||
{
|
||||
if (dst.is_virtual)
|
||||
return HWCONST(B0B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
|
||||
@ -290,12 +266,12 @@ static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size,
|
||||
NvU32 launch_dma_dst_type;
|
||||
NvU32 launch_dma_plc_mode;
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s",
|
||||
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_is_valid(push, dst, memset_element_size),
|
||||
"Memset validation failed in channel %s, GPU %s.\n",
|
||||
push->channel->name,
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
launch_dma_dst_type = memset_push_phys_mode(push, dst);
|
||||
launch_dma_dst_type = maxwell_memset_push_phys_mode(push, dst);
|
||||
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
|
||||
@ -322,7 +298,7 @@ static void memset_common(uvm_push_t *push, uvm_gpu_address_t dst, size_t size,
|
||||
|
||||
dst.address += memset_this_time * memset_element_size;
|
||||
size -= memset_this_time;
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
|
||||
pipelined_value = HWCONST(B0B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
|
||||
} while (size > 0);
|
||||
|
||||
maxwell_membar_after_transfer(push);
|
||||
@ -373,5 +349,6 @@ void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64
|
||||
|
||||
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size)
|
||||
{
|
||||
uvm_hal_maxwell_ce_memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
|
||||
uvm_push_get_gpu(push)->parent->ce_hal->memset_4(push, uvm_gpu_address_virtual(dst_va), value, size);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -455,7 +455,7 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
|
||||
//
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, struct mm_struct *mm, gfp_t gfp_flags)
|
||||
static NV_STATUS mem_alloc_sysmem_dma_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
{
|
||||
size_t i;
|
||||
NV_STATUS status;
|
||||
@ -500,7 +500,7 @@ error:
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, struct mm_struct *mm, gfp_t gfp_flags)
|
||||
static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, gfp_t gfp_flags)
|
||||
{
|
||||
size_t i;
|
||||
int order;
|
||||
@ -573,9 +573,9 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
|
||||
uvm_memcg_context_start(&memcg_context, mm);
|
||||
if (uvm_mem_is_sysmem_dma(mem))
|
||||
status = mem_alloc_sysmem_dma_chunks(mem, mm, gfp_flags);
|
||||
status = mem_alloc_sysmem_dma_chunks(mem, gfp_flags);
|
||||
else
|
||||
status = mem_alloc_sysmem_chunks(mem, mm, gfp_flags);
|
||||
status = mem_alloc_sysmem_chunks(mem, gfp_flags);
|
||||
|
||||
uvm_memcg_context_end(&memcg_context);
|
||||
return status;
|
||||
@ -584,14 +584,6 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_protected);
|
||||
}
|
||||
|
||||
static const char *mem_physical_source(uvm_mem_t *mem)
|
||||
{
|
||||
if (uvm_mem_is_vidmem(mem))
|
||||
return uvm_gpu_name(mem->backing_gpu);
|
||||
|
||||
return "CPU";
|
||||
}
|
||||
|
||||
NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_global_processor_mask_t *mask)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
@ -617,6 +609,7 @@ NV_STATUS uvm_mem_map_kernel(uvm_mem_t *mem, const uvm_global_processor_mask_t *
|
||||
NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 physical_size;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_protected = false;
|
||||
|
||||
@ -637,8 +630,8 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
|
||||
UVM_ASSERT(mem->chunk_size > 0);
|
||||
|
||||
mem->physical_allocation_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = mem->physical_allocation_size / mem->chunk_size;
|
||||
physical_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = physical_size / mem->chunk_size;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_protected);
|
||||
if (status != NV_OK)
|
||||
@ -665,7 +658,7 @@ static NV_STATUS mem_init_user_mapping(uvm_mem_t *mem, uvm_va_space_t *user_va_s
|
||||
}
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED((NvU64)user_addr, mem->chunk_size));
|
||||
UVM_ASSERT(mem->physical_allocation_size == mem->size);
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) == mem->size);
|
||||
|
||||
mem->user = uvm_kvmalloc_zero(sizeof(*mem->user));
|
||||
if (mem->user == NULL)
|
||||
@ -691,7 +684,7 @@ static void mem_deinit_user_mapping(uvm_mem_t *mem)
|
||||
|
||||
static NvU64 reserved_gpu_va(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
{
|
||||
UVM_ASSERT(mem->kernel.range_alloc.aligned_start + mem->physical_allocation_size < gpu->parent->uvm_mem_va_size);
|
||||
UVM_ASSERT(mem->kernel.range_alloc.aligned_start + uvm_mem_physical_size(mem) < gpu->parent->uvm_mem_va_size);
|
||||
|
||||
return gpu->parent->uvm_mem_va_base + mem->kernel.range_alloc.aligned_start;
|
||||
}
|
||||
@ -709,7 +702,7 @@ static struct page *mem_cpu_page(uvm_mem_t *mem, NvU64 offset)
|
||||
static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
|
||||
{
|
||||
struct page **pages = mem->sysmem.pages;
|
||||
size_t num_pages = mem->physical_allocation_size / PAGE_SIZE;
|
||||
size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
|
||||
pgprot_t prot = PAGE_KERNEL;
|
||||
|
||||
UVM_ASSERT(uvm_mem_is_sysmem(mem));
|
||||
@ -743,7 +736,7 @@ static NV_STATUS mem_map_cpu_to_vidmem_kernel(uvm_mem_t *mem)
|
||||
{
|
||||
struct page **pages;
|
||||
size_t num_chunk_pages = mem->chunk_size / PAGE_SIZE;
|
||||
size_t num_pages = mem->physical_allocation_size / PAGE_SIZE;
|
||||
size_t num_pages = uvm_mem_physical_size(mem) / PAGE_SIZE;
|
||||
size_t page_index;
|
||||
size_t chunk_index;
|
||||
|
||||
@ -798,7 +791,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struc
|
||||
// compound pages in order to be able to use vm_insert_page on them. This
|
||||
// is not currently being exercised because the only allocations using this
|
||||
// are semaphore pools (which typically use a single page).
|
||||
for (offset = 0; offset < mem->physical_allocation_size; offset += PAGE_SIZE) {
|
||||
for (offset = 0; offset < uvm_mem_physical_size(mem); offset += PAGE_SIZE) {
|
||||
int ret = vm_insert_page(vma, (unsigned long)mem->user->addr + offset, mem_cpu_page(mem, offset));
|
||||
if (ret) {
|
||||
UVM_ASSERT_MSG(ret == -ENOMEM, "ret: %d\n", ret);
|
||||
@ -810,7 +803,7 @@ static NV_STATUS mem_map_cpu_to_sysmem_user(uvm_mem_t *mem, struct vm_area_struc
|
||||
return NV_OK;
|
||||
|
||||
error:
|
||||
unmap_mapping_range(&mem->user->va_space->mapping, (size_t)mem->user->addr, mem->physical_allocation_size, 1);
|
||||
unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -819,7 +812,7 @@ void uvm_mem_unmap_cpu_user(uvm_mem_t *mem)
|
||||
if (!uvm_mem_mapped_on_cpu_user(mem))
|
||||
return;
|
||||
|
||||
unmap_mapping_range(&mem->user->va_space->mapping, (size_t)mem->user->addr, mem->physical_allocation_size, 1);
|
||||
unmap_mapping_range(mem->user->va_space->mapping, (size_t)mem->user->addr, uvm_mem_physical_size(mem), 1);
|
||||
mem_clear_mapped_on_cpu_user(mem);
|
||||
mem_deinit_user_mapping(mem);
|
||||
}
|
||||
@ -959,21 +952,17 @@ static uvm_gpu_phys_address_t mem_gpu_physical_sysmem(uvm_mem_t *mem, uvm_gpu_t
|
||||
return uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr + offset % mem->chunk_size);
|
||||
}
|
||||
|
||||
static bool mem_check_range(uvm_mem_t *mem, NvU64 offset, NvU64 size)
|
||||
bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size)
|
||||
{
|
||||
UVM_ASSERT(size != 0);
|
||||
UVM_ASSERT_MSG(UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size),
|
||||
"offset %llu size %llu page_size %u\n",
|
||||
offset,
|
||||
size,
|
||||
mem->chunk_size);
|
||||
UVM_ASSERT_MSG(offset / mem->chunk_size < mem->chunks_count, "offset %llu\n", offset);
|
||||
return true;
|
||||
UVM_ASSERT((offset + size) <= uvm_mem_physical_size(mem));
|
||||
|
||||
return UVM_ALIGN_DOWN(offset, mem->chunk_size) == UVM_ALIGN_DOWN(offset + size - 1, mem->chunk_size);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_mem_gpu_physical(uvm_mem_t *mem, uvm_gpu_t *gpu, NvU64 offset, NvU64 size)
|
||||
{
|
||||
UVM_ASSERT(mem_check_range(mem, offset, size));
|
||||
UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
|
||||
|
||||
if (uvm_mem_is_vidmem(mem)) {
|
||||
UVM_ASSERT(uvm_mem_is_local_vidmem(mem, gpu));
|
||||
@ -990,7 +979,7 @@ uvm_gpu_address_t uvm_mem_gpu_address_copy(uvm_mem_t *mem, uvm_gpu_t *accessing_
|
||||
size_t chunk_offset;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
UVM_ASSERT(mem_check_range(mem, offset, size));
|
||||
UVM_ASSERT(uvm_mem_is_physically_contiguous(mem, offset, size));
|
||||
|
||||
if (uvm_mem_is_sysmem(mem) || uvm_mem_is_local_vidmem(mem, accessing_gpu))
|
||||
return uvm_mem_gpu_address_physical(mem, accessing_gpu, offset, size);
|
||||
@ -1024,13 +1013,8 @@ static NvU64 mem_pte_maker(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
|
||||
|
||||
static void mem_unmap_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu, uvm_page_table_range_vec_t **range_vec)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_membar_t tlb_membar = UVM_MEMBAR_SYS;
|
||||
|
||||
if (uvm_mem_is_local_vidmem(mem, gpu))
|
||||
tlb_membar = UVM_MEMBAR_GPU;
|
||||
|
||||
status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
|
||||
uvm_membar_t tlb_membar = uvm_hal_downgrade_membar_type(gpu, uvm_mem_is_local_vidmem(mem, gpu));
|
||||
NV_STATUS status = uvm_page_table_range_vec_clear_ptes(*range_vec, tlb_membar);
|
||||
if (status != NV_OK)
|
||||
UVM_ERR_PRINT("Clearing PTEs failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
|
||||
@ -1054,22 +1038,19 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
.attrs = attrs
|
||||
};
|
||||
|
||||
if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
|
||||
return NV_ERR_OUT_OF_RANGE;
|
||||
|
||||
page_size = mem_pick_gpu_page_size(mem, gpu, tree);
|
||||
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
|
||||
|
||||
status = uvm_page_table_range_vec_create(tree,
|
||||
gpu_va,
|
||||
mem->physical_allocation_size,
|
||||
uvm_mem_physical_size(mem),
|
||||
page_size,
|
||||
pmm_flags,
|
||||
range_vec);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init page mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
|
||||
gpu_va,
|
||||
gpu_va + mem->physical_allocation_size,
|
||||
gpu_va + uvm_mem_physical_size(mem),
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
return status;
|
||||
@ -1079,7 +1060,7 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to write PTEs for mapping at [0x%llx, 0x%llx): %s, GPU %s\n",
|
||||
gpu_va,
|
||||
gpu_va + mem->physical_allocation_size,
|
||||
gpu_va + uvm_mem_physical_size(mem),
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
goto error;
|
||||
@ -1098,7 +1079,7 @@ static NV_STATUS mem_init_gpu_kernel_range(uvm_mem_t *mem)
|
||||
return NV_OK;
|
||||
|
||||
return uvm_range_allocator_alloc(&g_free_ranges,
|
||||
mem->physical_allocation_size,
|
||||
uvm_mem_physical_size(mem),
|
||||
mem->chunk_size,
|
||||
&mem->kernel.range_alloc);
|
||||
}
|
||||
@ -1139,7 +1120,7 @@ NV_STATUS uvm_mem_map_gpu_kernel(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
gpu_va = uvm_parent_gpu_canonical_address(gpu->parent, reserved_gpu_va(mem, gpu));
|
||||
gpu_va = reserved_gpu_va(mem, gpu);
|
||||
range_vec = &mem->kernel.range_vecs[uvm_global_id_gpu_index(gpu->global_id)];
|
||||
|
||||
status = mem_map_gpu(mem, gpu, gpu_va, &gpu->address_space_tree, &attrs, range_vec);
|
||||
@ -1165,6 +1146,7 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
NV_STATUS status;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_page_table_range_vec_t **range_vec;
|
||||
NvU64 gpu_va;
|
||||
|
||||
UVM_ASSERT(mem_can_be_mapped_on_gpu_user(mem, gpu));
|
||||
uvm_assert_rwsem_locked(&user_va_space->lock);
|
||||
@ -1172,6 +1154,10 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
if (uvm_mem_mapped_on_gpu_user(mem, gpu))
|
||||
return NV_OK;
|
||||
|
||||
gpu_va = (NvU64)user_addr;
|
||||
if (!uvm_gpu_can_address(gpu, gpu_va, mem->size))
|
||||
return NV_ERR_OUT_OF_RANGE;
|
||||
|
||||
status = uvm_mem_map_gpu_phys(mem, gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -1183,7 +1169,7 @@ NV_STATUS uvm_mem_map_gpu_user(uvm_mem_t *mem,
|
||||
gpu_va_space = uvm_gpu_va_space_get(mem->user->va_space, gpu);
|
||||
range_vec = &mem->user->range_vecs[uvm_global_id_gpu_index(gpu->global_id)];
|
||||
|
||||
status = mem_map_gpu(mem, gpu, (NvU64)mem->user->addr, &gpu_va_space->page_tables, attrs, range_vec);
|
||||
status = mem_map_gpu(mem, gpu, gpu_va, &gpu_va_space->page_tables, attrs, range_vec);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
|
@ -163,9 +163,6 @@ struct uvm_mem_struct
|
||||
|
||||
uvm_gpu_t *dma_owner;
|
||||
|
||||
// Size of the physical chunks.
|
||||
NvU32 chunk_size;
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
@ -194,12 +191,12 @@ struct uvm_mem_struct
|
||||
// Count of chunks (vidmem) or CPU pages (sysmem) above
|
||||
size_t chunks_count;
|
||||
|
||||
// Size of each physical chunk (vidmem) or CPU page (sysmem)
|
||||
NvU32 chunk_size;
|
||||
|
||||
// Size of the allocation
|
||||
NvU64 size;
|
||||
|
||||
// Size of the physical allocation backing
|
||||
NvU64 physical_allocation_size;
|
||||
|
||||
uvm_mem_user_mapping_t *user;
|
||||
|
||||
// Information specific to allocations mapped in UVM internal VA space.
|
||||
@ -235,6 +232,20 @@ NV_STATUS uvm_mem_translate_gpu_attributes(const UvmGpuMappingAttributes *attrs,
|
||||
|
||||
uvm_chunk_sizes_mask_t uvm_mem_kernel_chunk_sizes(uvm_gpu_t *gpu);
|
||||
|
||||
// Size of all the physical allocations backing the given memory.
|
||||
static inline NvU64 uvm_mem_physical_size(const uvm_mem_t *mem)
|
||||
{
|
||||
NvU64 physical_size = mem->chunks_count * mem->chunk_size;
|
||||
|
||||
UVM_ASSERT(mem->size <= physical_size);
|
||||
|
||||
return physical_size;
|
||||
}
|
||||
|
||||
// Returns true if the memory is physically contiguous in the
|
||||
// [offset, offset + size) interval.
|
||||
bool uvm_mem_is_physically_contiguous(uvm_mem_t *mem, NvU64 offset, NvU64 size);
|
||||
|
||||
// Allocate memory according to the given allocation parameters.
|
||||
//
|
||||
// In the case of sysmem, the memory is immediately physically accessible from
|
||||
|
@ -62,7 +62,7 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
|
||||
|
||||
verif_size = UVM_ALIGN_UP(verif_size, sizeof(*sys_verif));
|
||||
|
||||
UVM_ASSERT(mem->physical_allocation_size >= verif_size);
|
||||
UVM_ASSERT(uvm_mem_physical_size(mem) >= verif_size);
|
||||
UVM_ASSERT(verif_size >= sizeof(*sys_verif));
|
||||
|
||||
TEST_NV_CHECK_GOTO(__alloc_map_sysmem(verif_size, gpu, &sys_mem), done);
|
||||
@ -185,7 +185,7 @@ static NV_STATUS test_map_gpu(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
|
||||
gpu_va = uvm_mem_get_gpu_va_kernel(mem, gpu);
|
||||
TEST_CHECK_RET(gpu_va >= gpu->parent->uvm_mem_va_base);
|
||||
TEST_CHECK_RET(gpu_va + mem->physical_allocation_size <= gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size);
|
||||
TEST_CHECK_RET(gpu_va + uvm_mem_physical_size(mem) <= gpu->parent->uvm_mem_va_base + gpu->parent->uvm_mem_va_size);
|
||||
|
||||
// Mapping if already mapped is OK
|
||||
TEST_NV_CHECK_RET(uvm_mem_map_gpu_kernel(mem, gpu));
|
||||
@ -370,6 +370,7 @@ static NV_STATUS test_all(uvm_va_space_t *va_space)
|
||||
static const int max_supported_page_sizes = 4 + 1;
|
||||
int i;
|
||||
|
||||
|
||||
gpu_count = uvm_processor_mask_get_gpu_count(&va_space->registered_gpus);
|
||||
|
||||
// +1 for the CPU
|
||||
|
@ -86,7 +86,8 @@ static NV_STATUS block_migrate_map_mapped_pages(uvm_va_block_t *va_block,
|
||||
// Only map those pages that are not already mapped on destination
|
||||
for_each_va_block_unset_page_in_region_mask(page_index, pages_mapped_on_destination, region) {
|
||||
prot = uvm_va_block_page_compute_highest_permission(va_block, dest_id, page_index);
|
||||
UVM_ASSERT(prot != UVM_PROT_NONE);
|
||||
if (prot == UVM_PROT_NONE)
|
||||
continue;
|
||||
|
||||
if (va_block_context->mask_by_prot[prot - 1].count++ == 0)
|
||||
uvm_page_mask_zero(&va_block_context->mask_by_prot[prot - 1].page_mask);
|
||||
@ -206,7 +207,17 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status = NV_OK;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, region));
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
else {
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
|
||||
@ -229,6 +240,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
NULL,
|
||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||
}
|
||||
}
|
||||
|
||||
if (status == NV_OK && mode == UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP) {
|
||||
// block_migrate_add_mappings will acquire the work from the above
|
||||
@ -316,7 +328,8 @@ static bool migration_should_do_cpu_preunmap(uvm_va_space_t *va_space,
|
||||
// read-duplication is enabled in the VA range. This is because, when migrating
|
||||
// read-duplicated VA blocks, the source processor doesn't need to be unmapped
|
||||
// (though it may need write access revoked).
|
||||
static bool va_range_should_do_cpu_preunmap(uvm_va_policy_t *policy, uvm_va_space_t *va_space)
|
||||
static bool va_range_should_do_cpu_preunmap(const uvm_va_policy_t *policy,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
return !uvm_va_policy_is_read_duplicate(policy, va_space);
|
||||
}
|
||||
@ -406,7 +419,7 @@ static void preunmap_multi_block(uvm_va_range_t *va_range,
|
||||
}
|
||||
|
||||
if (num_unmap_pages > 0)
|
||||
unmap_mapping_range(&va_range->va_space->mapping, start, end - start + 1, 1);
|
||||
unmap_mapping_range(va_range->va_space->mapping, start, end - start + 1, 1);
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||
@ -524,6 +537,17 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
NV_STATUS status = NV_OK;
|
||||
bool skipped_migrate = false;
|
||||
|
||||
if (!first_va_range) {
|
||||
// For HMM, we iterate over va_blocks since there is no va_range.
|
||||
return uvm_hmm_migrate_ranges(va_space,
|
||||
va_block_context,
|
||||
base,
|
||||
length,
|
||||
dest_id,
|
||||
mode,
|
||||
out_tracker);
|
||||
}
|
||||
|
||||
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
||||
|
||||
va_range_last = NULL;
|
||||
@ -594,10 +618,10 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
NvU64 length,
|
||||
uvm_processor_id_t dest_id,
|
||||
NvU32 migrate_flags,
|
||||
uvm_va_range_t *first_va_range,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_range_t *first_va_range = uvm_va_space_iter_first(va_space, base, base);
|
||||
uvm_va_block_context_t *va_block_context;
|
||||
bool do_mappings;
|
||||
bool do_two_passes;
|
||||
@ -606,9 +630,6 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
if (!first_va_range || first_va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// If the GPU has its memory disabled, just skip the migration and let
|
||||
// faults take care of things.
|
||||
if (!uvm_va_space_processor_has_memory(va_space, dest_id))
|
||||
@ -616,6 +637,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
va_block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!va_block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
@ -638,7 +660,9 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
||||
// 1- Transfer all VA blocks (do not add mappings)
|
||||
// 2- Go block by block reexecuting the transfer (in case someone moved it
|
||||
// since the first pass), and adding the mappings.
|
||||
is_single_block = is_migration_single_block(first_va_range, base, length);
|
||||
//
|
||||
// For HMM (!first_va_range), we always do a single pass.
|
||||
is_single_block = !first_va_range || is_migration_single_block(first_va_range, base, length);
|
||||
do_mappings = UVM_ID_IS_GPU(dest_id) || !(migrate_flags & UVM_MIGRATE_FLAG_SKIP_CPU_MAP);
|
||||
do_two_passes = do_mappings && !is_single_block;
|
||||
|
||||
@ -854,6 +878,7 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
|
||||
if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
|
||||
UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
|
||||
UVM_INFO_PRINT("TEMP\n");
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
@ -916,6 +941,9 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
params->length,
|
||||
(dest_gpu ? dest_gpu->id : UVM_ID_CPU),
|
||||
params->flags,
|
||||
uvm_va_space_iter_first(va_space,
|
||||
params->base,
|
||||
params->base),
|
||||
tracker_ptr);
|
||||
}
|
||||
else if (status == NV_WARN_NOTHING_TO_DO) {
|
||||
@ -1029,10 +1057,26 @@ NV_STATUS uvm_api_migrate_range_group(UVM_MIGRATE_RANGE_GROUP_PARAMS *params, st
|
||||
NvU64 start = rgr->node.start;
|
||||
NvU64 length = rgr->node.end - rgr->node.start + 1;
|
||||
|
||||
if (gpu && !uvm_gpu_can_address(gpu, start, length))
|
||||
if (gpu && !uvm_gpu_can_address(gpu, start, length)) {
|
||||
status = NV_ERR_OUT_OF_RANGE;
|
||||
else
|
||||
status = uvm_migrate(va_space, mm, start, length, dest_id, migrate_flags, &local_tracker);
|
||||
}
|
||||
else {
|
||||
uvm_va_range_t *first_va_range = uvm_va_space_iter_first(va_space, start, start);
|
||||
|
||||
if (!first_va_range || first_va_range->type != UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
status = NV_ERR_INVALID_ADDRESS;
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = uvm_migrate(va_space,
|
||||
mm,
|
||||
start,
|
||||
length,
|
||||
dest_id,
|
||||
migrate_flags,
|
||||
first_va_range,
|
||||
&local_tracker);
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
@ -727,16 +727,19 @@ error:
|
||||
//
|
||||
static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t location)
|
||||
{
|
||||
bool should_location_be_vidmem;
|
||||
UVM_ASSERT(tree->gpu != NULL);
|
||||
UVM_ASSERT_MSG((location == UVM_APERTURE_VID) ||
|
||||
(location == UVM_APERTURE_SYS) ||
|
||||
(location == UVM_APERTURE_DEFAULT),
|
||||
"Invalid location %s (%d)\n", uvm_aperture_string(location), (int)location);
|
||||
|
||||
// The tree must be explicitly initialized in vidmem when in SR-IOV heavy.
|
||||
// The only exception are "fake" GPUs used during page tree testing, which
|
||||
// can be identified by having no channel manager.
|
||||
if ((tree->gpu->channel_manager != NULL) && uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu))
|
||||
should_location_be_vidmem = uvm_gpu_is_virt_mode_sriov_heavy(tree->gpu);
|
||||
|
||||
// The page tree of a "fake" GPU used during page tree testing can be in
|
||||
// sysmem even if should_location_be_vidmem is true. A fake GPU can be
|
||||
// identified by having no channel manager.
|
||||
if ((tree->gpu->channel_manager != NULL) && should_location_be_vidmem)
|
||||
UVM_ASSERT(location == UVM_APERTURE_VID);
|
||||
|
||||
if (location == UVM_APERTURE_DEFAULT) {
|
||||
@ -874,6 +877,7 @@ void uvm_page_tree_put_ptes_async(uvm_page_tree_t *tree, uvm_page_table_range_t
|
||||
// traverse until we hit an in-use page, or the root
|
||||
while (dir->host_parent != NULL && dir->ref_count == 0) {
|
||||
uvm_page_directory_t *parent = dir->host_parent;
|
||||
uvm_membar_t this_membar;
|
||||
|
||||
if (free_count == 0) {
|
||||
|
||||
@ -902,10 +906,9 @@ void uvm_page_tree_put_ptes_async(uvm_page_tree_t *tree, uvm_page_table_range_t
|
||||
|
||||
invalidate_depth = dir->host_parent->depth;
|
||||
|
||||
// If any of the pointed to PDEs were in sysmem then a SYS membar is
|
||||
// required after the TLB invalidate.
|
||||
if (dir->phys_alloc.addr.aperture == UVM_APERTURE_SYS)
|
||||
membar_after_invalidate = UVM_MEMBAR_SYS;
|
||||
// Take the membar with the widest scope of any of the pointed-to PDEs
|
||||
this_membar = uvm_hal_downgrade_membar_type(tree->gpu, dir->phys_alloc.addr.aperture == UVM_APERTURE_VID);
|
||||
membar_after_invalidate = max(membar_after_invalidate, this_membar);
|
||||
|
||||
// If any of the cleared PDEs were in sysmem then a SYS membar is
|
||||
// required after the clears and before the TLB invalidate.
|
||||
@ -978,7 +981,8 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
{
|
||||
uvm_mmu_mode_hal_t *hal = tree->hal;
|
||||
|
||||
// bit index just beyond the most significant bit used to index the current entry
|
||||
// bit index just beyond the most significant bit used to index the current
|
||||
// entry
|
||||
NvU32 addr_bit_shift = hal->num_va_bits();
|
||||
|
||||
// track depth upon which the invalidate occured
|
||||
@ -997,19 +1001,28 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
// ensure that the caller has specified a valid page size
|
||||
UVM_ASSERT((page_size & hal->page_sizes()) != 0);
|
||||
|
||||
// This algorithm will work with unaligned ranges, but the caller's intent is unclear
|
||||
UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0, "start 0x%llx size 0x%zx page_size 0x%x",
|
||||
start, (size_t)size, page_size);
|
||||
// This algorithm will work with unaligned ranges, but the caller's intent
|
||||
// is unclear
|
||||
UVM_ASSERT_MSG(start % page_size == 0 && size % page_size == 0,
|
||||
"start 0x%llx size 0x%zx page_size 0x%x\n",
|
||||
start,
|
||||
(size_t)size,
|
||||
page_size);
|
||||
|
||||
// The GPU should be capable of addressing the passed range
|
||||
if (tree->type == UVM_PAGE_TREE_TYPE_USER)
|
||||
UVM_ASSERT(uvm_gpu_can_address(tree->gpu, start, size));
|
||||
else
|
||||
UVM_ASSERT(uvm_gpu_can_address_kernel(tree->gpu, start, size));
|
||||
|
||||
while (true) {
|
||||
|
||||
// index of the entry, for the first byte of the range, within its containing directory
|
||||
// index of the entry, for the first byte of the range, within its
|
||||
// containing directory
|
||||
NvU32 start_index;
|
||||
|
||||
// index of the entry, for the last byte of the range, within its containing directory
|
||||
// index of the entry, for the last byte of the range, within its
|
||||
// containing directory
|
||||
NvU32 end_index;
|
||||
|
||||
// pointer to PDE/PTE
|
||||
@ -2280,9 +2293,9 @@ static NV_STATUS create_dynamic_sysmem_mapping(uvm_gpu_t *gpu)
|
||||
UVM_ASSERT(gpu->parent->flat_sysmem_va_base != 0);
|
||||
|
||||
// The DMA addressable window is the maximum system physical memory
|
||||
// addressable by the GPU (this limit is 128TB in Pascal-Ampere). The
|
||||
// virtual mapping to sysmem is linear, so its size matches that of the
|
||||
// physical address space.
|
||||
// addressable by the GPU (this limit is 128TB in Pascal-Ada). The virtual
|
||||
// mapping to sysmem is linear, so its size matches that of the physical
|
||||
// address space.
|
||||
flat_sysmem_va_size = gpu->parent->dma_addressable_limit + 1 - gpu->parent->dma_addressable_start;
|
||||
|
||||
// The optimal mapping granularity is dependent on multiple factors:
|
||||
@ -2351,7 +2364,7 @@ NV_STATUS uvm_mmu_sysmem_map(uvm_gpu_t *gpu, NvU64 pa, NvU64 size)
|
||||
// because in the common case the VA block lock is held.
|
||||
pmm_flags = UVM_PMM_ALLOC_FLAGS_NONE;
|
||||
|
||||
sysmem_mapping->base = uvm_parent_gpu_canonical_address(gpu->parent, virtual_address.address);
|
||||
sysmem_mapping->base = virtual_address.address;
|
||||
|
||||
status = create_identity_mapping(gpu,
|
||||
sysmem_mapping->base,
|
||||
|
@ -36,7 +36,9 @@
|
||||
#define UVM_PAGE_SIZE_AGNOSTIC 0
|
||||
|
||||
// Memory layout of UVM's kernel VA space.
|
||||
// The following memory regions are not to scale.
|
||||
// The following memory regions are not to scale. The memory layout is linear,
|
||||
// i.e., no canonical form address conversion.
|
||||
//
|
||||
// Hopper:
|
||||
// +----------------+ 128PB
|
||||
// | |
|
||||
@ -57,7 +59,7 @@
|
||||
// | |
|
||||
// +----------------+ 0 (rm_va_base)
|
||||
//
|
||||
// Pascal-Ampere:
|
||||
// Pascal-Ada:
|
||||
// +----------------+ 512TB
|
||||
// | |
|
||||
// | (not used) |
|
||||
@ -592,13 +594,18 @@ static bool uvm_mmu_page_size_supported(uvm_page_tree_t *tree, NvU32 page_size)
|
||||
|
||||
static NvU32 uvm_mmu_biggest_page_size_up_to(uvm_page_tree_t *tree, NvU32 max_page_size)
|
||||
{
|
||||
NvU32 gpu_page_sizes = tree->hal->page_sizes();
|
||||
NvU32 smallest_gpu_page_size = gpu_page_sizes & ~(gpu_page_sizes - 1);
|
||||
NvU32 page_sizes;
|
||||
NvU32 page_size;
|
||||
|
||||
UVM_ASSERT_MSG(is_power_of_2(max_page_size), "0x%x\n", max_page_size);
|
||||
|
||||
if (max_page_size < smallest_gpu_page_size)
|
||||
return 0;
|
||||
|
||||
// Calculate the supported page sizes that are not larger than the max
|
||||
page_sizes = tree->hal->page_sizes() & (max_page_size | (max_page_size - 1));
|
||||
page_sizes = gpu_page_sizes & (max_page_size | (max_page_size - 1));
|
||||
|
||||
// And pick the biggest one of them
|
||||
page_size = 1 << __fls(page_sizes);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -305,18 +305,25 @@ static NV_STATUS test_page_tree_init(uvm_gpu_t *gpu, NvU32 big_page_size, uvm_pa
|
||||
return uvm_page_tree_init(gpu, NULL, UVM_PAGE_TREE_TYPE_USER, big_page_size, UVM_APERTURE_SYS, tree);
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_init_kernel(uvm_gpu_t *gpu, NvU32 big_page_size, uvm_page_tree_t *tree)
|
||||
{
|
||||
return uvm_page_tree_init(gpu, NULL, UVM_PAGE_TREE_TYPE_KERNEL, big_page_size, UVM_APERTURE_SYS, tree);
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_page_table_range_t *range)
|
||||
{
|
||||
return uvm_page_tree_get_ptes(tree,
|
||||
page_size,
|
||||
uvm_parent_gpu_canonical_address(tree->gpu->parent, start),
|
||||
size,
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
range);
|
||||
uvm_mmu_mode_hal_t *hal = tree->gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Maxwell GPUs don't use canonical form address even on platforms that
|
||||
// support it.
|
||||
start = (tree->type == UVM_PAGE_TREE_TYPE_USER) && (hal->num_va_bits() > 40) ?
|
||||
uvm_parent_gpu_canonical_address(tree->gpu->parent, start) :
|
||||
start;
|
||||
return uvm_page_tree_get_ptes(tree, page_size, start, size, UVM_PMM_ALLOC_FLAGS_NONE, range);
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
@ -324,11 +331,13 @@ static NV_STATUS test_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
uvm_page_table_range_t *single)
|
||||
{
|
||||
return uvm_page_tree_get_entry(tree,
|
||||
page_size,
|
||||
uvm_parent_gpu_canonical_address(tree->gpu->parent, start),
|
||||
UVM_PMM_ALLOC_FLAGS_NONE,
|
||||
single);
|
||||
uvm_mmu_mode_hal_t *hal = tree->gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// See comment above (test_page_tree_get_ptes)
|
||||
start = (tree->type == UVM_PAGE_TREE_TYPE_USER) && (hal->num_va_bits() > 40) ?
|
||||
uvm_parent_gpu_canonical_address(tree->gpu->parent, start) :
|
||||
start;
|
||||
return uvm_page_tree_get_entry(tree, page_size, start, UVM_PMM_ALLOC_FLAGS_NONE, single);
|
||||
}
|
||||
|
||||
static NV_STATUS test_page_tree_alloc_table(uvm_page_tree_t *tree,
|
||||
@ -411,7 +420,10 @@ static NV_STATUS alloc_64k_memory_57b_va(uvm_gpu_t *gpu)
|
||||
uvm_page_table_range_t range;
|
||||
|
||||
NvLength size = 64 * 1024;
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
|
||||
// We use a kernel-type page tree to decouple the test from the CPU VA width
|
||||
// and canonical form address limits.
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, 0x100000000000000ULL, size, &range), NV_OK);
|
||||
TEST_CHECK_RET(range.entry_count == 1);
|
||||
TEST_CHECK_RET(range.table->depth == 5);
|
||||
@ -532,7 +544,7 @@ static NV_STATUS allocate_then_free_8_8_64k(uvm_gpu_t *gpu)
|
||||
NvLength start = stride * 248 + 256LL * 1024 * 1024 * 1024 + (1LL << 47);
|
||||
int i;
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, start + i * stride, size, range + i), NV_OK);
|
||||
@ -652,7 +664,7 @@ static NV_STATUS get_entire_table_4k(uvm_gpu_t *gpu)
|
||||
|
||||
NvLength size = 1 << 21;
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_4K, start, size, &range), NV_OK);
|
||||
|
||||
TEST_CHECK_RET(range.table == tree.root->entries[1]->entries[0]->entries[0]->entries[1]);
|
||||
@ -672,13 +684,13 @@ static NV_STATUS get_entire_table_512m(uvm_gpu_t *gpu)
|
||||
uvm_page_tree_t tree;
|
||||
uvm_page_table_range_t range;
|
||||
|
||||
NvU64 start = 1UL << 47;
|
||||
NvU64 start = 1UL << 48;
|
||||
NvLength size = 512UL * 512 * 1024 * 1024;
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_512M, start, size, &range), NV_OK);
|
||||
|
||||
TEST_CHECK_RET(range.table == tree.root->entries[1]->entries[0]);
|
||||
TEST_CHECK_RET(range.table == tree.root->entries[2]->entries[0]);
|
||||
TEST_CHECK_RET(range.entry_count == 512);
|
||||
TEST_CHECK_RET(range.table->depth == 2);
|
||||
TEST_CHECK_RET(range.page_size == UVM_PAGE_SIZE_512M);
|
||||
@ -701,7 +713,7 @@ static NV_STATUS split_4k_from_2m(uvm_gpu_t *gpu)
|
||||
NvU64 start = 1UL << 48;
|
||||
NvLength size = 1 << 21;
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_2M, start, size, &range_2m), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_2M, start + size, size, &range_adj), NV_OK);
|
||||
|
||||
@ -749,7 +761,7 @@ static NV_STATUS split_2m_from_512m(uvm_gpu_t *gpu)
|
||||
NvU64 start = 1UL << 48;
|
||||
NvLength size = 512UL * 1024 * 1024;
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_512M, start, size, &range_512m), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_512M, start + size, size, &range_adj), NV_OK);
|
||||
|
||||
@ -1025,7 +1037,7 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
// Depth 1
|
||||
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
|
||||
|
||||
fake_tlb_invals_enable();
|
||||
|
||||
@ -1270,7 +1282,7 @@ static NV_STATUS test_range_vec(uvm_gpu_t *gpu, NvU32 big_page_size, NvU32 page_
|
||||
NvU32 i;
|
||||
NvU64 offsets[4];
|
||||
|
||||
MEM_NV_CHECK_RET(test_page_tree_init(gpu, big_page_size, &tree), NV_OK);
|
||||
MEM_NV_CHECK_RET(test_page_tree_init_kernel(gpu, big_page_size, &tree), NV_OK);
|
||||
|
||||
pde_coverage = uvm_mmu_pde_coverage(&tree, page_size);
|
||||
page_table_entries = pde_coverage / page_size;
|
||||
|
@ -39,32 +39,6 @@ void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 of
|
||||
OFFSET_OUT_LOWER, HWVALUE(C0B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
|
||||
}
|
||||
|
||||
// Perform an appropriate membar before a semaphore operation. Returns whether
|
||||
// the semaphore operation should include a flush.
|
||||
static bool pascal_membar_before_semaphore(uvm_push_t *push)
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
|
||||
// No MEMBAR requested, don't use a flush.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
|
||||
// By default do a MEMBAR SYS and for that we can just use flush on the
|
||||
// semaphore operation.
|
||||
return true;
|
||||
}
|
||||
|
||||
// MEMBAR GPU requested, do it on the HOST and skip the CE flush as CE
|
||||
// doesn't have this capability.
|
||||
gpu = uvm_push_get_gpu(push);
|
||||
gpu->parent->host_hal->wait_for_idle(push);
|
||||
gpu->parent->host_hal->membar_gpu(push);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
@ -72,7 +46,7 @@ void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 p
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = pascal_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@ -98,7 +72,7 @@ void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, N
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = pascal_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
@ -127,7 +101,7 @@ void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
|
||||
NvU32 launch_dma_plc_mode;
|
||||
bool use_flush;
|
||||
|
||||
use_flush = pascal_membar_before_semaphore(push);
|
||||
use_flush = uvm_hal_membar_before_semaphore(push);
|
||||
|
||||
if (use_flush)
|
||||
flush_value = HWCONST(C0B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
|
||||
|
@ -54,6 +54,9 @@ typedef enum
|
||||
// Locking: uvm_va_space: write
|
||||
UVM_PERF_EVENT_BLOCK_SHRINK,
|
||||
|
||||
// Locking: HMM uvm_va_block lock
|
||||
UVM_PERF_EVENT_BLOCK_MUNMAP,
|
||||
|
||||
// Locking: uvm_va_space: write
|
||||
UVM_PERF_EVENT_RANGE_DESTROY,
|
||||
|
||||
@ -89,6 +92,12 @@ typedef union
|
||||
uvm_va_block_t *block;
|
||||
} block_shrink;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_va_block_t *block;
|
||||
uvm_va_block_region_t region;
|
||||
} block_munmap;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_va_range_t *range;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -236,7 +236,7 @@ static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_blo
|
||||
const uvm_page_mask_t *resident_mask = NULL;
|
||||
const uvm_page_mask_t *thrashing_pages = NULL;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_block_region_t max_prefetch_region;
|
||||
NvU32 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
@ -372,7 +372,7 @@ void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint)
|
||||
{
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
|
||||
NvU32 pending_prefetch_pages;
|
||||
@ -380,9 +380,10 @@ void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
|
||||
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block, va_block_context, faulted_region));
|
||||
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, faulted_region));
|
||||
|
||||
out_hint->residency = UVM_ID_INVALID;
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -398,11 +398,13 @@ static uvm_perf_module_t g_module_thrashing;
|
||||
// Callback declaration for the performance heuristics events
|
||||
static void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||||
static void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||||
static void thrashing_block_munmap_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||||
|
||||
static uvm_perf_module_event_callback_desc_t g_callbacks_thrashing[] = {
|
||||
{ UVM_PERF_EVENT_BLOCK_DESTROY, thrashing_block_destroy_cb },
|
||||
{ UVM_PERF_EVENT_MODULE_UNLOAD, thrashing_block_destroy_cb },
|
||||
{ UVM_PERF_EVENT_BLOCK_SHRINK , thrashing_block_destroy_cb },
|
||||
{ UVM_PERF_EVENT_BLOCK_MUNMAP , thrashing_block_munmap_cb },
|
||||
{ UVM_PERF_EVENT_MIGRATION, thrashing_event_cb },
|
||||
{ UVM_PERF_EVENT_REVOCATION, thrashing_event_cb }
|
||||
};
|
||||
@ -533,7 +535,7 @@ static void gpu_thrashing_stats_destroy(uvm_gpu_t *gpu)
|
||||
// VA space lock needs to be held
|
||||
static va_space_thrashing_info_t *va_space_thrashing_info_get_or_null(uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
// TODO: Bug 3898454: check locking requirement for UVM-HMM.
|
||||
|
||||
return uvm_perf_module_type_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||||
}
|
||||
@ -689,6 +691,20 @@ void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t
|
||||
uvm_perf_thrashing_info_destroy(va_block);
|
||||
}
|
||||
|
||||
void thrashing_block_munmap_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_va_block_t *va_block = event_data->block_munmap.block;
|
||||
uvm_va_block_region_t region = event_data->block_munmap.region;
|
||||
|
||||
UVM_ASSERT(g_uvm_perf_thrashing_enable);
|
||||
UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_MUNMAP);
|
||||
UVM_ASSERT(va_block);
|
||||
|
||||
thrashing_reset_pages_in_region(va_block,
|
||||
uvm_va_block_region_start(va_block, region),
|
||||
uvm_va_block_region_size(region));
|
||||
}
|
||||
|
||||
// Sanity checks of the thrashing tracking state
|
||||
static bool thrashing_state_checks(uvm_va_block_t *va_block,
|
||||
block_thrashing_info_t *block_thrashing,
|
||||
@ -1075,7 +1091,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
|
||||
NV_STATUS tracker_status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_processor_id_t processor_id;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
@ -1121,7 +1137,7 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
|
||||
{
|
||||
block_thrashing_info_t *block_thrashing;
|
||||
uvm_processor_mask_t unmap_processors;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
|
||||
@ -1425,7 +1441,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_processor_id_t do_not_throttle_processor = page_thrashing->do_not_throttle_processor_id;
|
||||
uvm_processor_id_t pinned_residency = page_thrashing->pinned_residency_id;
|
||||
uvm_va_policy_t *policy;
|
||||
const uvm_va_policy_t *policy;
|
||||
uvm_processor_id_t preferred_location;
|
||||
|
||||
policy = uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
|
||||
@ -1519,7 +1535,8 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(page_thrashing->pinned_residency_id)], requester)) {
|
||||
else if (uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(pinned_residency)], requester)) {
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
UVM_ASSERT(uvm_id_equal(closest_resident_id, pinned_residency));
|
||||
|
||||
hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
|
||||
@ -1790,8 +1807,6 @@ static void thrashing_unpin_pages(struct work_struct *work)
|
||||
uvm_va_space_t *va_space = va_space_thrashing->va_space;
|
||||
uvm_va_block_context_t *va_block_context = &va_space_thrashing->pinned_pages.va_block_context;
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
// Take the VA space lock so that VA blocks don't go away during this
|
||||
// operation.
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -100,10 +100,10 @@
|
||||
// All allocated user memory root chunks are tracked in an LRU list
|
||||
// (root_chunks.va_block_used). A root chunk is moved to the tail of that list
|
||||
// whenever any of its subchunks is allocated (unpinned) by a VA block (see
|
||||
// uvm_pmm_gpu_unpin_temp()). When a root chunk is selected for eviction, it has
|
||||
// the eviction flag set (see pick_root_chunk_to_evict()). This flag affects
|
||||
// many of the PMM operations on all of the subchunks of the root chunk being
|
||||
// evicted. See usage of (root_)chunk_is_in_eviction(), in particular in
|
||||
// uvm_pmm_gpu_unpin_allocated()). When a root chunk is selected for eviction,
|
||||
// it has the eviction flag set (see pick_root_chunk_to_evict()). This flag
|
||||
// affects many of the PMM operations on all of the subchunks of the root chunk
|
||||
// being evicted. See usage of (root_)chunk_is_in_eviction(), in particular in
|
||||
// chunk_free_locked() and claim_free_chunk().
|
||||
//
|
||||
// To evict a root chunk, all of its free subchunks are pinned, then all
|
||||
@ -394,7 +394,8 @@ static bool chunk_is_root_chunk_pinned(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chun
|
||||
return chunk->suballoc->pinned_leaf_chunks > 0;
|
||||
}
|
||||
|
||||
// Pin a chunk and update its root chunk's pinned leaf chunks count if the chunk is not a root chunk
|
||||
// Pin a chunk and update its root chunk's pinned leaf chunks count if the
|
||||
// chunk is not a root chunk.
|
||||
static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
@ -406,17 +407,20 @@ static void chunk_pin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
if (chunk_is_root_chunk(chunk))
|
||||
return;
|
||||
|
||||
// For subchunks, update the pinned leaf chunks count tracked in the suballoc of the root chunk.
|
||||
// For subchunks, update the pinned leaf chunks count tracked in the
|
||||
// suballoc of the root chunk.
|
||||
chunk = &root_chunk->chunk;
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be split
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
chunk->suballoc->pinned_leaf_chunks++;
|
||||
}
|
||||
|
||||
// Unpin a chunk and update its root chunk's pinned leaf chunks count if the chunk is not a root chunk
|
||||
// Unpin a chunk and update its root chunk's pinned leaf chunks count if the
|
||||
// chunk is not a root chunk.
|
||||
static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_chunk_state_t new_state)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
@ -432,10 +436,12 @@ static void chunk_unpin(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_pmm_gpu_
|
||||
if (chunk_is_root_chunk(chunk))
|
||||
return;
|
||||
|
||||
// For subchunks, update the pinned leaf chunks count tracked in the suballoc of the root chunk.
|
||||
// For subchunks, update the pinned leaf chunks count tracked in the
|
||||
// suballoc of the root chunk.
|
||||
chunk = &root_chunk->chunk;
|
||||
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be split
|
||||
// The passed-in subchunk is not the root chunk so the root chunk has to be
|
||||
// split.
|
||||
UVM_ASSERT_MSG(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT, "chunk state %s\n",
|
||||
uvm_pmm_gpu_chunk_state_string(chunk->state));
|
||||
|
||||
@ -609,6 +615,7 @@ static NV_STATUS pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
chunk_unpin(pmm, chunks[i], UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunks[i]->is_referenced = false;
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
@ -653,7 +660,10 @@ static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk
|
||||
list_del_init(&chunk->list);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_temp(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
static void gpu_unpin_temp(uvm_pmm_gpu_t *pmm,
|
||||
uvm_gpu_chunk_t *chunk,
|
||||
uvm_va_block_t *va_block,
|
||||
bool is_referenced)
|
||||
{
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
|
||||
@ -667,16 +677,26 @@ void uvm_pmm_gpu_unpin_temp(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_b
|
||||
UVM_ASSERT(chunk->va_block_page_index < uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
chunk_unpin(pmm, chunk, UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
chunk->is_referenced = is_referenced;
|
||||
chunk->va_block = va_block;
|
||||
chunk_update_lists_locked(pmm, chunk);
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, false);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block)
|
||||
{
|
||||
gpu_unpin_temp(pmm, chunk, va_block, true);
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t *tracker)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_root_chunk_t *root_chunk;
|
||||
|
||||
if (!chunk)
|
||||
return;
|
||||
@ -684,11 +704,12 @@ void uvm_pmm_gpu_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_tracker_t
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED ||
|
||||
chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED);
|
||||
|
||||
root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
|
||||
if (tracker) {
|
||||
uvm_gpu_root_chunk_t *root_chunk;
|
||||
|
||||
uvm_tracker_remove_completed(tracker);
|
||||
|
||||
root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
root_chunk_lock(pmm, root_chunk);
|
||||
|
||||
// Remove any completed entries from the root tracker to prevent it from
|
||||
@ -756,6 +777,7 @@ static bool assert_chunk_mergeable(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(child->va_block == child_va_block);
|
||||
UVM_ASSERT(child->va_block_page_index ==
|
||||
prev_child->va_block_page_index + uvm_gpu_chunk_get_size(prev_child) / PAGE_SIZE);
|
||||
UVM_ASSERT(child->is_referenced == prev_child->is_referenced);
|
||||
}
|
||||
}
|
||||
|
||||
@ -799,6 +821,7 @@ static void merge_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
UVM_ASSERT(subchunk->va_block);
|
||||
chunk->va_block = subchunk->va_block;
|
||||
chunk->va_block_page_index = subchunk->va_block_page_index;
|
||||
chunk->is_referenced = subchunk->is_referenced;
|
||||
}
|
||||
else if (child_state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
UVM_ASSERT(root_chunk->chunk.suballoc->pinned_leaf_chunks >= num_sub);
|
||||
@ -2013,8 +2036,8 @@ static NV_STATUS alloc_chunk_with_splits(uvm_pmm_gpu_t *pmm,
|
||||
UVM_PANIC();
|
||||
}
|
||||
|
||||
// Allocates a single chunk of a given size. If needed splits a chunk of bigger size
|
||||
// or, if that is not possible, allocates from PMA or evicts.
|
||||
// Allocates a single chunk of a given size. If needed, splits a chunk of
|
||||
// bigger size or, if that is not possible, allocates from PMA or evicts.
|
||||
NV_STATUS alloc_chunk(uvm_pmm_gpu_t *pmm,
|
||||
uvm_pmm_gpu_memory_type_t type,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
@ -2027,8 +2050,7 @@ NV_STATUS alloc_chunk(uvm_pmm_gpu_t *pmm,
|
||||
chunk = claim_free_chunk(pmm, type, chunk_size);
|
||||
if (chunk) {
|
||||
// A free chunk could be claimed, we are done.
|
||||
*out_chunk = chunk;
|
||||
return NV_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (chunk_size == UVM_CHUNK_SIZE_MAX) {
|
||||
@ -2039,11 +2061,11 @@ NV_STATUS alloc_chunk(uvm_pmm_gpu_t *pmm,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
*out_chunk = chunk;
|
||||
return NV_OK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// We didn't find a free chunk and we will require splits so acquire the PMM lock.
|
||||
// We didn't find a free chunk and we will require splits so acquire the
|
||||
// PMM lock.
|
||||
uvm_mutex_lock(&pmm->lock);
|
||||
|
||||
status = alloc_chunk_with_splits(pmm, type, chunk_size, flags, &chunk);
|
||||
@ -2055,6 +2077,7 @@ NV_STATUS alloc_chunk(uvm_pmm_gpu_t *pmm,
|
||||
return status;
|
||||
}
|
||||
|
||||
out:
|
||||
*out_chunk = chunk;
|
||||
|
||||
return NV_OK;
|
||||
@ -2273,7 +2296,8 @@ void free_root_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chunk_t *root_chunk, free_
|
||||
}
|
||||
|
||||
// Splits the input chunk into subchunks of the next size down. The chunk state
|
||||
// can be UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED or UVM_PMM_GPU_CHUNK_STATE_ALLOCATED.
|
||||
// can be UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED or
|
||||
// UVM_PMM_GPU_CHUNK_STATE_ALLOCATED.
|
||||
//
|
||||
// UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED: This is a split for allocation.
|
||||
//
|
||||
@ -2339,6 +2363,7 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
uvm_assert_mutex_locked(&chunk->va_block->lock);
|
||||
subchunk->va_block = chunk->va_block;
|
||||
subchunk->va_block_page_index = chunk->va_block_page_index + (i * subchunk_size) / PAGE_SIZE;
|
||||
subchunk->is_referenced = chunk->is_referenced;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2354,6 +2379,7 @@ NV_STATUS split_gpu_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED) {
|
||||
chunk->va_block = NULL;
|
||||
chunk->va_block_page_index = PAGES_PER_UVM_VA_BLOCK;
|
||||
chunk->is_referenced = false;
|
||||
}
|
||||
else if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_TEMP_PINNED) {
|
||||
// -1 for the parent chunk that is going to transition into the split state.
|
||||
@ -2511,6 +2537,8 @@ static bool try_chunk_free(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
UVM_ASSERT(chunk->state != UVM_PMM_GPU_CHUNK_STATE_ALLOCATED || !chunk->is_referenced);
|
||||
|
||||
chunk->inject_split_error = false;
|
||||
|
||||
// Chunks that are the last allocated child need to trigger a merge and are
|
||||
@ -3254,6 +3282,270 @@ NvU32 uvm_pmm_gpu_phys_to_virt(uvm_pmm_gpu_t *pmm, NvU64 phys_addr, NvU64 region
|
||||
return num_mappings;
|
||||
}
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
static uvm_pmm_gpu_t *devmem_page_to_pmm(struct page *page)
|
||||
{
|
||||
return container_of(page->pgmap, uvm_pmm_gpu_t, devmem.pagemap);
|
||||
}
|
||||
|
||||
static uvm_gpu_chunk_t *devmem_page_to_chunk_locked(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
NvU64 chunk_addr = ((NvU64)page_to_pfn(page) << PAGE_SHIFT) - pmm->devmem.pagemap.range.start;
|
||||
size_t index = chunk_addr / UVM_CHUNK_SIZE_MAX;
|
||||
uvm_gpu_chunk_t *root_chunk;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
uvm_gpu_chunk_t *parent;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
|
||||
UVM_ASSERT(index < pmm->root_chunks.count);
|
||||
root_chunk = &pmm->root_chunks.array[index].chunk;
|
||||
UVM_ASSERT(root_chunk->address == UVM_ALIGN_DOWN(chunk_addr, UVM_CHUNK_SIZE_MAX));
|
||||
|
||||
// Find the uvm_gpu_chunk_t that corresponds to the device private struct
|
||||
// page's PFN. The loop is only 0, 1, or 2 iterations.
|
||||
for (chunk = root_chunk;
|
||||
uvm_gpu_chunk_get_size(chunk) != page_size(page);
|
||||
chunk = parent->suballoc->subchunks[index]) {
|
||||
|
||||
parent = chunk;
|
||||
UVM_ASSERT(parent->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT(parent->suballoc);
|
||||
|
||||
chunk_size = uvm_gpu_chunk_get_size(parent->suballoc->subchunks[0]);
|
||||
index = (size_t)uvm_div_pow2_64(chunk_addr - parent->address, chunk_size);
|
||||
UVM_ASSERT(index < num_subchunks(parent));
|
||||
}
|
||||
|
||||
UVM_ASSERT(chunk->address = chunk_addr);
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED);
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
chunk = devmem_page_to_chunk_locked(page);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
|
||||
UVM_ASSERT(is_device_private_page(page));
|
||||
|
||||
return gpu->id;
|
||||
}
|
||||
|
||||
static void evict_orphan_pages(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT);
|
||||
UVM_ASSERT(chunk->suballoc);
|
||||
|
||||
for (i = 0; i < num_subchunks(chunk); i++) {
|
||||
uvm_gpu_chunk_t *subchunk = chunk->suballoc->subchunks[i];
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
if (subchunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT) {
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
evict_orphan_pages(pmm, subchunk);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (subchunk->state == UVM_PMM_GPU_CHUNK_STATE_ALLOCATED && subchunk->is_referenced) {
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
uvm_hmm_pmm_gpu_evict_chunk(uvm_pmm_to_gpu(pmm), subchunk);
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_pmm_gpu_free_orphan_pages(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (!pmm->initialized)
|
||||
return;
|
||||
|
||||
// Scan all the root chunks looking for subchunks which are still
|
||||
// referenced. This is slow, but we only do this when unregistering a GPU
|
||||
// and is not critical for performance.
|
||||
for (i = 0; i < pmm->root_chunks.count; i++) {
|
||||
uvm_gpu_root_chunk_t *root_chunk = &pmm->root_chunks.array[i];
|
||||
|
||||
root_chunk_lock(pmm, root_chunk);
|
||||
if (root_chunk->chunk.state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT)
|
||||
evict_orphan_pages(pmm, &root_chunk->chunk);
|
||||
root_chunk_unlock(pmm, root_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
static void devmem_page_free(struct page *page)
|
||||
{
|
||||
uvm_pmm_gpu_t *pmm = devmem_page_to_pmm(page);
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
page->zone_device_data = NULL;
|
||||
|
||||
// We should be calling free_chunk() except that it acquires a mutex and
|
||||
// we may be in an interrupt context where we can't do that. Instead,
|
||||
// do a lazy free. Note that we have to use a "normal" spin lock because
|
||||
// the UVM context is not available.
|
||||
spin_lock(&pmm->list_lock.lock);
|
||||
|
||||
chunk = devmem_page_to_chunk_locked(page);
|
||||
UVM_ASSERT(chunk->is_referenced);
|
||||
chunk->is_referenced = false;
|
||||
list_add_tail(&chunk->list, &pmm->root_chunks.va_block_lazy_free);
|
||||
|
||||
spin_unlock(&pmm->list_lock.lock);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&gpu->parent->lazy_free_q,
|
||||
&pmm->root_chunks.va_block_lazy_free_q_item);
|
||||
}
|
||||
|
||||
// This is called by HMM when the CPU faults on a ZONE_DEVICE private entry.
|
||||
static vm_fault_t devmem_fault(struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = vmf->page->zone_device_data;
|
||||
|
||||
if (!va_space)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
return uvm_va_space_cpu_fault_hmm(va_space, vmf->vma, vmf);
|
||||
}
|
||||
|
||||
static vm_fault_t devmem_fault_entry(struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(devmem_fault(vmf));
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops uvm_pmm_devmem_ops =
|
||||
{
|
||||
.page_free = devmem_page_free,
|
||||
.migrate_to_ram = devmem_fault_entry,
|
||||
};
|
||||
|
||||
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
unsigned long size = pmm->root_chunks.count * UVM_CHUNK_SIZE_MAX;
|
||||
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
|
||||
struct resource *res;
|
||||
void *ptr;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_hmm_is_enabled_system_wide()) {
|
||||
devmem->pagemap.owner = NULL;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
res = request_free_mem_region(&iomem_resource, size, "nvidia-uvm-hmm");
|
||||
if (IS_ERR(res)) {
|
||||
UVM_ERR_PRINT("request_free_mem_region() err %ld\n", PTR_ERR(res));
|
||||
status = errno_to_nv_status(PTR_ERR(res));
|
||||
goto err;
|
||||
}
|
||||
|
||||
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
|
||||
devmem->pagemap.range.start = res->start;
|
||||
devmem->pagemap.range.end = res->end;
|
||||
devmem->pagemap.nr_range = 1;
|
||||
devmem->pagemap.ops = &uvm_pmm_devmem_ops;
|
||||
devmem->pagemap.owner = &g_uvm_global;
|
||||
|
||||
// Numa node ID doesn't matter for ZONE_DEVICE private pages.
|
||||
ptr = memremap_pages(&devmem->pagemap, NUMA_NO_NODE);
|
||||
if (IS_ERR(ptr)) {
|
||||
UVM_ERR_PRINT("memremap_pages() err %ld\n", PTR_ERR(ptr));
|
||||
status = errno_to_nv_status(PTR_ERR(ptr));
|
||||
goto err_release;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
||||
err_release:
|
||||
release_mem_region(res->start, resource_size(res));
|
||||
err:
|
||||
devmem->pagemap.owner = NULL;
|
||||
return status;
|
||||
}
|
||||
|
||||
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
uvm_pmm_gpu_devmem_t *devmem = &pmm->devmem;
|
||||
|
||||
if (!devmem->pagemap.owner)
|
||||
return;
|
||||
|
||||
memunmap_pages(&devmem->pagemap);
|
||||
release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
|
||||
}
|
||||
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
return (pmm->devmem.pagemap.range.start + chunk->address) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#if !UVM_IS_CONFIG_HMM()
|
||||
static NV_STATUS devmem_init(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void devmem_deinit(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
}
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
static void process_lazy_free(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
|
||||
// Note: We can't use list_for_each_safe_entry() because we drop the lock
|
||||
// in the loop. Instead, just keep removing the first entry until the list
|
||||
// is empty.
|
||||
while (!list_empty(&pmm->root_chunks.va_block_lazy_free)) {
|
||||
chunk = list_first_entry(&pmm->root_chunks.va_block_lazy_free, uvm_gpu_chunk_t, list);
|
||||
list_del_init(&chunk->list);
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
|
||||
free_chunk(pmm, chunk);
|
||||
|
||||
uvm_spin_lock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&pmm->list_lock);
|
||||
}
|
||||
|
||||
static void process_lazy_free_entry(void *args)
|
||||
{
|
||||
UVM_ENTRY_VOID(process_lazy_free(args));
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
@ -3279,6 +3571,8 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
}
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_used);
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_unused);
|
||||
INIT_LIST_HEAD(&pmm->root_chunks.va_block_lazy_free);
|
||||
nv_kthread_q_item_init(&pmm->root_chunks.va_block_lazy_free_q_item, process_lazy_free_entry, pmm);
|
||||
|
||||
uvm_mutex_init(&pmm->lock, UVM_LOCK_ORDER_PMM);
|
||||
uvm_init_rwsem(&pmm->pma_lock, UVM_LOCK_ORDER_PMM_PMA);
|
||||
@ -3354,6 +3648,10 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
|
||||
}
|
||||
}
|
||||
|
||||
status = devmem_init(pmm);
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
|
||||
return NV_OK;
|
||||
cleanup:
|
||||
uvm_pmm_gpu_deinit(pmm);
|
||||
@ -3387,9 +3685,11 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
if (!pmm->initialized)
|
||||
return;
|
||||
|
||||
gpu = uvm_pmm_to_gpu(pmm);
|
||||
nv_kthread_q_flush(&gpu->parent->lazy_free_q);
|
||||
UVM_ASSERT(list_empty(&pmm->root_chunks.va_block_lazy_free));
|
||||
release_free_root_chunks(pmm);
|
||||
|
||||
gpu = uvm_pmm_to_gpu(pmm);
|
||||
if (gpu->mem_info.size != 0 && gpu_supports_pma_eviction(gpu))
|
||||
nvUvmInterfacePmaUnregisterEvictionCallbacks(pmm->pma);
|
||||
|
||||
@ -3425,6 +3725,8 @@ void uvm_pmm_gpu_deinit(uvm_pmm_gpu_t *pmm)
|
||||
|
||||
deinit_caches(pmm);
|
||||
|
||||
devmem_deinit(pmm);
|
||||
|
||||
pmm->initialized = false;
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,9 @@
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_types.h"
|
||||
#include "nv_uvm_types.h"
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
#include <linux/memremap.h>
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
@ -195,7 +198,35 @@ typedef uvm_chunk_size_t uvm_chunk_sizes_mask_t;
|
||||
|
||||
typedef struct uvm_pmm_gpu_chunk_suballoc_struct uvm_pmm_gpu_chunk_suballoc_t;
|
||||
|
||||
typedef struct uvm_gpu_chunk_struct uvm_gpu_chunk_t;
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
typedef struct uvm_pmm_gpu_struct uvm_pmm_gpu_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct dev_pagemap pagemap;
|
||||
} uvm_pmm_gpu_devmem_t;
|
||||
|
||||
// Return the GPU chunk for a given device private struct page.
|
||||
uvm_gpu_chunk_t *uvm_pmm_devmem_page_to_chunk(struct page *page);
|
||||
|
||||
// Return the GPU id for a given device private struct page.
|
||||
uvm_gpu_id_t uvm_pmm_devmem_page_to_gpu_id(struct page *page);
|
||||
|
||||
// Return the PFN of the device private struct page for the given GPU chunk.
|
||||
unsigned long uvm_pmm_gpu_devmem_get_pfn(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
|
||||
// Free any orphan pages.
|
||||
// This should be called as part of removing a GPU: after all work is stopped
|
||||
// and all va_blocks have been destroyed. There normally won't be any
|
||||
// device private struct page references left but there can be cases after
|
||||
// fork() where a child process still holds a reference. This function searches
|
||||
// for pages that still have a reference and migrates the page to the GPU in
|
||||
// order to release the reference in the CPU page table.
|
||||
void uvm_pmm_gpu_free_orphan_pages(uvm_pmm_gpu_t *pmm);
|
||||
|
||||
#endif
|
||||
|
||||
struct uvm_gpu_chunk_struct
|
||||
{
|
||||
// Physical address of GPU chunk. This may be removed to save memory
|
||||
@ -226,6 +257,10 @@ struct uvm_gpu_chunk_struct
|
||||
// chunk.
|
||||
bool is_zero : 1;
|
||||
|
||||
// This flag indicates an allocated chunk is referenced by a device
|
||||
// private struct page PTE and therefore expects a page_free() callback.
|
||||
bool is_referenced : 1;
|
||||
|
||||
uvm_pmm_gpu_chunk_state_t state : order_base_2(UVM_PMM_GPU_CHUNK_STATE_COUNT + 1);
|
||||
|
||||
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
@ -309,7 +344,7 @@ typedef struct
|
||||
atomic64_t map_count;
|
||||
} uvm_gpu_root_chunk_indirect_peer_t;
|
||||
|
||||
typedef struct
|
||||
typedef struct uvm_pmm_gpu_struct
|
||||
{
|
||||
// Sizes of the MMU
|
||||
uvm_chunk_sizes_mask_t chunk_sizes[UVM_PMM_GPU_MEMORY_TYPE_COUNT];
|
||||
@ -348,9 +383,19 @@ typedef struct
|
||||
// List of root chunks used by VA blocks
|
||||
struct list_head va_block_used;
|
||||
|
||||
// List of chunks needing to be lazily freed and a queue for processing
|
||||
// the list. TODO: Bug 3881835: revisit whether to use nv_kthread_q_t
|
||||
// or workqueue.
|
||||
struct list_head va_block_lazy_free;
|
||||
nv_kthread_q_item_t va_block_lazy_free_q_item;
|
||||
|
||||
uvm_gpu_root_chunk_indirect_peer_t indirect_peer[UVM_ID_MAX_GPUS];
|
||||
} root_chunks;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
uvm_pmm_gpu_devmem_t devmem;
|
||||
#endif
|
||||
|
||||
// Lock protecting PMA allocation, freeing and eviction
|
||||
uvm_rw_semaphore_t pma_lock;
|
||||
|
||||
@ -410,16 +455,17 @@ struct page *uvm_gpu_chunk_to_page(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk);
|
||||
// Allocates num_chunks chunks of size chunk_size in caller-supplied array (chunks).
|
||||
//
|
||||
// Returned chunks are in the TEMP_PINNED state, requiring a call to either
|
||||
// uvm_pmm_gpu_unpin_temp or uvm_pmm_gpu_free. If a tracker is passed in, all
|
||||
// uvm_pmm_gpu_unpin_allocated, uvm_pmm_gpu_unpin_referenced, or
|
||||
// uvm_pmm_gpu_free. If a tracker is passed in, all
|
||||
// the pending operations on the allocated chunks will be added to it
|
||||
// guaranteeing that all the entries come from the same GPU as the PMM.
|
||||
// Otherwise, when tracker is NULL, all the pending operations will be
|
||||
// synchronized before returning to the caller.
|
||||
//
|
||||
// Each of the allocated chunks list nodes (uvm_gpu_chunk_t::list) can be used
|
||||
// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_temp) or freed
|
||||
// (uvm_pmm_gpu_free). If used, the list node has to be returned to a valid
|
||||
// state before calling either of the APIs.
|
||||
// by the caller until the chunk is unpinned (uvm_pmm_gpu_unpin_allocated,
|
||||
// uvm_pmm_gpu_unpin_referenced) or freed (uvm_pmm_gpu_free). If used, the list
|
||||
// node has to be returned to a valid state before calling either of the APIs.
|
||||
//
|
||||
// In case of an error, the chunks array is guaranteed to be cleared.
|
||||
NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
|
||||
@ -459,10 +505,17 @@ static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
|
||||
return uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, UVM_PMM_GPU_MEMORY_TYPE_USER, flags, chunks, out_tracker);
|
||||
}
|
||||
|
||||
// Unpin a temporarily pinned chunk and set its reverse map to a VA block
|
||||
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
||||
// mark it as allocated.
|
||||
//
|
||||
// Can only be used on user memory.
|
||||
void uvm_pmm_gpu_unpin_temp(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
void uvm_pmm_gpu_unpin_allocated(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
|
||||
// Unpin a temporarily pinned chunk, set its reverse map to a VA block, and
|
||||
// mark it as referenced.
|
||||
//
|
||||
// Can only be used on user memory.
|
||||
void uvm_pmm_gpu_unpin_referenced(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk, uvm_va_block_t *va_block);
|
||||
|
||||
// Frees the chunk. This also unpins the chunk if it is temporarily pinned.
|
||||
//
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -28,6 +28,7 @@
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_pmm_gpu.h"
|
||||
|
||||
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
|
||||
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
|
||||
@ -176,17 +177,25 @@ size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_map
|
||||
uvm_reverse_map_t *out_mappings,
|
||||
size_t max_out_mappings);
|
||||
|
||||
#define UVM_CPU_CHUNK_SIZES PAGE_SIZE
|
||||
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
|
||||
|
||||
#if UVM_CPU_CHUNK_SIZES == PAGE_SIZE
|
||||
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 1
|
||||
typedef struct page uvm_cpu_chunk_t;
|
||||
typedef enum
|
||||
{
|
||||
UVM_CPU_CHUNK_ALLOC_FLAGS_NONE = 0,
|
||||
|
||||
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (page_index)
|
||||
// Zero the chunk.
|
||||
UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO = (1 << 0),
|
||||
|
||||
#else
|
||||
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 0
|
||||
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
// Account for the chunk in the cgroup context.
|
||||
UVM_CPU_CHUNK_ALLOC_FLAGS_ACCOUNT = (1 << 1),
|
||||
} uvm_cpu_chunk_alloc_flags_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_CPU_CHUNK_TYPE_PHYSICAL,
|
||||
UVM_CPU_CHUNK_TYPE_LOGICAL,
|
||||
UVM_CPU_CHUNK_TYPE_HMM
|
||||
} uvm_cpu_chunk_type_t;
|
||||
|
||||
// CPU memory chunk descriptor.
|
||||
// CPU memory chunks represent a physically contiguous CPU memory
|
||||
@ -197,6 +206,22 @@ typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
// splitting are referred to as "logical chunks".
|
||||
struct uvm_cpu_chunk_struct
|
||||
{
|
||||
uvm_cpu_chunk_type_t type:2;
|
||||
|
||||
// Size of the chunk.
|
||||
// For chunks resulting from page allocations (physical chunks),
|
||||
// this value is the size of the physical allocation.
|
||||
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
|
||||
// Chunk reference count used when a CPU chunk is split. Each
|
||||
// child sub-chunk will increment the reference count of its
|
||||
// parent.
|
||||
// The reference count is set to 1 when the chunk is created.
|
||||
// This initial reference is dropped if the chunk is split in
|
||||
// order to automatically destroy the chunk when all logical
|
||||
// chunks resulting from the split are destroyed.
|
||||
nv_kref_t refcount;
|
||||
|
||||
// Pointer to the CPU page backing this CPU chunk.
|
||||
// For physical chunks, this will point to the head page. Physical
|
||||
// chunk allocation will set the reference count for the struct
|
||||
@ -209,100 +234,110 @@ struct uvm_cpu_chunk_struct
|
||||
// reference counted, there is no need to take separate references
|
||||
// to the struct page for logical chunks.
|
||||
struct page *page;
|
||||
};
|
||||
|
||||
// For logical chunks, this points to the parent chunk (which
|
||||
// could also be a logical chunk). For physical chunks, this
|
||||
// is NULL.
|
||||
uvm_cpu_chunk_t *parent;
|
||||
typedef struct
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
NvU32 map_count;
|
||||
} uvm_cpu_phys_mapping_t;
|
||||
|
||||
// Page offset of this chunk within the physical size of
|
||||
// the parent.
|
||||
uvm_page_index_t offset;
|
||||
typedef struct
|
||||
{
|
||||
uvm_cpu_chunk_t common;
|
||||
|
||||
// Region within the VA block covered by this CPU chunk.
|
||||
uvm_va_block_region_t region;
|
||||
// Lock protecting dirty_bitmap and gpu_mappings.
|
||||
uvm_mutex_t lock;
|
||||
|
||||
// Chunk reference count used when a CPU chunk is split. Each
|
||||
// child sub-chunk will increment the reference count of its
|
||||
// parent.
|
||||
nv_kref_t refcount;
|
||||
|
||||
// Size of the chunk.
|
||||
// For chunks resulting from page allocations (physical chunks),
|
||||
// this value is the size of the physical allocation.
|
||||
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
|
||||
struct {
|
||||
struct
|
||||
{
|
||||
// Per-GPU array of DMA mapping addresses for the chunk.
|
||||
// The DMA mapping addresses for logical chunks are adjusted
|
||||
// to the correct offset within the parent chunk.
|
||||
union {
|
||||
NvU64 static_entry;
|
||||
NvU64 *dynamic_entries;
|
||||
union
|
||||
{
|
||||
uvm_cpu_phys_mapping_t static_entry;
|
||||
uvm_cpu_phys_mapping_t *dynamic_entries;
|
||||
};
|
||||
|
||||
// Miximum number of physical mapping entries available.
|
||||
// The initial value is 1 since the static_entry is always
|
||||
// available.
|
||||
// When using the dynamic_entries, it holds the size of the
|
||||
// dynamic_entries array. This may be more than the number
|
||||
// of GPUs with active mappings. The number of active entries
|
||||
// is the number of set bits in dma_addrs_mask.
|
||||
size_t max_entries;
|
||||
|
||||
// The set of GPU ID's that have an active physical mapping.
|
||||
// Since physical mappings are shared by all GPUs under a
|
||||
// parent GPU, this mask only needs to track uvm_parent_gpu_t.
|
||||
uvm_processor_mask_t dma_addrs_mask;
|
||||
} gpu_mappings;
|
||||
|
||||
// Lock protecting dirty_bitmap
|
||||
uvm_spinlock_t lock;
|
||||
|
||||
// A dynamically allocated bitmap (one per PAGE_SIZE page) used
|
||||
// to track dirty state of each PAGE_SIZE page.
|
||||
// Dirty state is tracked only by physical chunks. Therefore,
|
||||
// for logical chunks this will be NULL;
|
||||
// Large CPU chunks are allocated as compound pages. For such
|
||||
// pages, the kernel keeps dirtiness state with a single bit
|
||||
// (in the compound page head) that covers the entire compound
|
||||
// page.
|
||||
//
|
||||
// In the case of UVM-Lite GPUs, using the dirty bit of the
|
||||
// the compound page will cause performance regression due to
|
||||
// the copying of extra data. We mitigate this by using this
|
||||
// bitmap to track which base pages are dirty.
|
||||
unsigned long *dirty_bitmap;
|
||||
};
|
||||
|
||||
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (chunk->region.first)
|
||||
#endif // UVM_CPU_CHUNK_SIZES == PAGE_SIZE
|
||||
} uvm_cpu_physical_chunk_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uvm_cpu_chunk_t common;
|
||||
|
||||
// Pointer to the parent chunk (which could also be a logical chunk).
|
||||
uvm_cpu_chunk_t *parent;
|
||||
uvm_processor_mask_t mapped_gpus;
|
||||
} uvm_cpu_logical_chunk_t;
|
||||
|
||||
// Return the set of allowed CPU chunk allocation sizes.
|
||||
uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void);
|
||||
|
||||
// Allocate a physical CPU chunk for the specified page index and owned by
|
||||
// va_block.
|
||||
// Allocate a physical CPU chunk of the specified size.
|
||||
//
|
||||
// The size of the allocated CPU chunk may be any of the allowed sizes and
|
||||
// depends on several factors:
|
||||
// * Allocation will be attempted in reverse order - highest to lowest - in
|
||||
// order ensure that the highest possible size is used.
|
||||
// * An allocation size will be used if:
|
||||
// - the VA region within the block covered by the allocation size is
|
||||
// aligned to that allocation size,
|
||||
// - the VA block region corresponding to the allocation size is empty
|
||||
// (has no previously populated pages), and
|
||||
// - the system allows a page allocation of that size.
|
||||
//
|
||||
// If mm is not NULL, the chunks memory will be added to the mm's memory cgroup.
|
||||
//
|
||||
// If a CPU chunk allocation succeeds, NV_OK is returned. If new_chunk is not
|
||||
// NULL it will be set to point to the newly allocated chunk. On failure,
|
||||
// NV_ERR_NO_MEMORY is returned.
|
||||
NV_STATUS uvm_cpu_chunk_alloc(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
struct mm_struct *mm,
|
||||
// If a CPU chunk allocation succeeds, NV_OK is returned. new_chunk will be set
|
||||
// to point to the newly allocated chunk. On failure, NV_ERR_NO_MEMORY is
|
||||
// returned.
|
||||
NV_STATUS uvm_cpu_chunk_alloc(uvm_chunk_size_t alloc_size,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
uvm_cpu_chunk_t **new_chunk);
|
||||
|
||||
// Insert a CPU chunk in the va_block's storage structures.
|
||||
// Allocate a HMM CPU chunk.
|
||||
//
|
||||
// On success, NV_OK is returned. On error,
|
||||
// - NV_ERR_NO_MEMORY is returned if memory allocation for any if the internal
|
||||
// structures did not succeed.
|
||||
// - NV_ERR_INVALID_ARGUMENT is returned if the size of the chunk to be inserted
|
||||
// in invalid.
|
||||
// - NV_ERR_INVALID_STATE is returned if a matching chunk already exists in the
|
||||
// block.
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
// HMM chunks differ from normal CPU chunks in that the kernel has already
|
||||
// allocated the page for them. This means we don't allocate any CPU memory
|
||||
// here. It also means the kernel holds the reference to the page, so we
|
||||
// shouldn't call put_page() when freeing the chunk.
|
||||
//
|
||||
// If a CPU chunk allocation succeeds NV_OK is returned and new_chunk will be
|
||||
// set to point to the newly allocated chunk. On failure, NV_ERR_NO_MEMORY is
|
||||
// returned.
|
||||
//
|
||||
// Note that the kernel retains logical ownership of the page. This means page
|
||||
// properties should not be directly modified by UVM. In particular page flags
|
||||
// such as PageDirty should not be modified by UVM, nor can UVM directly free
|
||||
// the page. The kernel is also responsible for mapping/unmapping the page on
|
||||
// the CPU. We create a CPU chunk for the page primarily to allow GPU mappings
|
||||
// for the page to be created.
|
||||
NV_STATUS uvm_cpu_chunk_alloc_hmm(struct page *page,
|
||||
uvm_cpu_chunk_t **new_chunk);
|
||||
|
||||
// Remove a CPU chunk from the va_block's storage structures.
|
||||
// The chunk is not freed, only removed from the block's storage structures.
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
// Convert a physical chunk to an HMM chunk.
|
||||
static void uvm_cpu_chunk_make_hmm(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL);
|
||||
|
||||
// Return the CPU chunk backing page_index within the VA block.
|
||||
// If page_index is beyond the boundary of the VA block or a CPU chunk for
|
||||
// the specified page has not been allocated and/or inserted into the block,
|
||||
// NULL is returned.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *block, uvm_page_index_t page_index);
|
||||
chunk->type = UVM_CPU_CHUNK_TYPE_HMM;
|
||||
}
|
||||
|
||||
uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk);
|
||||
|
||||
@ -313,158 +348,105 @@ static size_t uvm_cpu_chunk_num_pages(uvm_cpu_chunk_t *chunk)
|
||||
return uvm_cpu_chunk_get_size(chunk) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline bool uvm_cpu_chunk_is_hmm(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
return chunk->type == UVM_CPU_CHUNK_TYPE_HMM;
|
||||
}
|
||||
|
||||
static bool uvm_cpu_chunk_is_physical(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
return true;
|
||||
#else
|
||||
return chunk->parent == NULL;
|
||||
#endif
|
||||
return (chunk->type == UVM_CPU_CHUNK_TYPE_PHYSICAL || uvm_cpu_chunk_is_hmm(chunk));
|
||||
}
|
||||
|
||||
// Return a pointer to the struct page backing page_index within the owning
|
||||
// VA block.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
static bool uvm_cpu_chunk_is_logical(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
return chunk->type == UVM_CPU_CHUNK_TYPE_LOGICAL;
|
||||
}
|
||||
|
||||
// Take a reference to the CPU chunk.
|
||||
void uvm_cpu_chunk_get(uvm_cpu_chunk_t *chunk);
|
||||
static uvm_cpu_physical_chunk_t *uvm_cpu_chunk_to_physical(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_physical(chunk));
|
||||
return container_of((chunk), uvm_cpu_physical_chunk_t, common);
|
||||
}
|
||||
|
||||
// Release a reference to the CPU chunk. When the reference count
|
||||
// drops to zero, the CPU chunk will be freed. Physical CPU chunks
|
||||
// will also free the CPU pages backing the chunk.
|
||||
void uvm_cpu_chunk_put(uvm_cpu_chunk_t *chunk);
|
||||
static uvm_cpu_logical_chunk_t *uvm_cpu_chunk_to_logical(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
UVM_ASSERT(uvm_cpu_chunk_is_logical(chunk));
|
||||
return container_of((chunk), uvm_cpu_logical_chunk_t, common);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_gpu_mapping_alloc(uvm_va_block_t *va_block, uvm_gpu_id_t id);
|
||||
void uvm_cpu_chunk_gpu_mapping_split(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_gpu_id_t id);
|
||||
void uvm_cpu_chunk_gpu_mapping_free(uvm_va_block_t *va_block, uvm_gpu_id_t id);
|
||||
// Free a CPU chunk.
|
||||
// This may not result in the immediate freeing of the physical pages of the
|
||||
// chunk if this is a logical chunk and there are other logical chunks holding
|
||||
// references to the physical chunk.
|
||||
// If any DMA mappings to this chunk are still active, they are implicitly
|
||||
// destroyed.
|
||||
void uvm_cpu_chunk_free(uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Set the CPU chunk's DMA mapping address for the specified GPU ID.
|
||||
NV_STATUS uvm_cpu_chunk_set_gpu_mapping_addr(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_gpu_id_t id,
|
||||
NvU64 dma_addr);
|
||||
// In some configurations such as SR-IOV heavy, a CPU chunk cannot be
|
||||
// referenced using its physical address. There needs to be a kernel virtual
|
||||
// mapping created.
|
||||
//
|
||||
// This helper function creates a DMA mapping on the GPU (see
|
||||
// uvm_cpu_chunk_map_gpu()) and if necessary a kernel virtual mapping for the
|
||||
// chunk. The virtual mapping persists until GPU deinitialization, such that no
|
||||
// unmap functionality is exposed. For more details see uvm_mmu_sysmem_map().
|
||||
//
|
||||
// Note that unlike uvm_cpu_chunk_map_gpu(), this helper requires the GPU
|
||||
// object instead of the parent GPU object.
|
||||
NV_STATUS uvm_cpu_chunk_map_gpu(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu);
|
||||
|
||||
// Destroy a CPU chunk's DMA mapping for the parent GPU.
|
||||
// If chunk is a logical chunk, this call may not necessary destroy the DMA
|
||||
// mapping of the parent physical chunk since all logical chunks share the
|
||||
// parent's DMA mapping.
|
||||
void uvm_cpu_chunk_unmap_gpu_phys(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Get the CPU chunk's DMA mapping address for the specified GPU ID.
|
||||
NvU64 uvm_cpu_chunk_get_gpu_mapping_addr(uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_gpu_id_t id);
|
||||
// If there is no mapping for the GPU, 0 is returned.
|
||||
NvU64 uvm_cpu_chunk_get_gpu_phys_addr(uvm_cpu_chunk_t *chunk, uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#if !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
// Split a CPU chunk into a set of CPU chunks of size new_size.
|
||||
// new_size has to be one of the supported CPU chunk allocation sizes and has to
|
||||
// be smaller than the current size of chunk.
|
||||
// Split a CPU chunk into a set of CPU chunks of the next size down from the set
|
||||
// of enabled CPU chunk sizes.
|
||||
//
|
||||
// On success, NV_OK is returned. On failure NV_ERR_NO_MEMORY will be returned.
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t **new_chunks);
|
||||
// This function expects that the chunk to be split is larger than the minimum
|
||||
// enabled chunk size and that new_chunks has enough space for all chunks
|
||||
// resulting from the split.
|
||||
//
|
||||
// On success, NV_OK is returned and the caller-provided new_chunks array will
|
||||
// be filled out with the newly-created logical chunks.
|
||||
//
|
||||
// After a successfull split, the input chunk can no longer be used.
|
||||
//
|
||||
// On failure NV_ERR_NO_MEMORY will be returned.
|
||||
//
|
||||
// Should never be called for HMM chunks as these don't need splitting (they can
|
||||
// only be PAGE_SIZE) and even if larger chunks could exist UVM could not split
|
||||
// them without kernel interaction which currently isn't exported. Will return
|
||||
// NV_ERR_INVALID_ARGUMENT for a HMM chunk.
|
||||
// TODO: Bug 3368756: add support for transparent huge page (THP)
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_cpu_chunk_t *chunk, uvm_cpu_chunk_t **new_chunks);
|
||||
|
||||
// Merge chunks to merge_size.
|
||||
//
|
||||
// All input chunks must have the same parent and size. If not,
|
||||
// NV_ERR_INVALID_ARGUMENT is returned.
|
||||
//
|
||||
// If a merge cannot be done, NV_WARN_NOTHING_TO_DO is returned.
|
||||
//
|
||||
// On success, NV_OK is returned and merged_chunk is set to point to the
|
||||
// merged chunk.
|
||||
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t **chunks,
|
||||
size_t num_merge_chunks,
|
||||
uvm_chunk_size_t merge_size,
|
||||
uvm_cpu_chunk_t **merged_chunk);
|
||||
// Merge an array of logical chunks into their parent chunk. All chunks have to
|
||||
// have the same size, parent, and set of mapped GPUs.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_cpu_chunk_t **chunks);
|
||||
|
||||
// Mark the CPU sub-page page_index in the CPU chunk as dirty.
|
||||
// page_index has to be a page withing the chunk's region.
|
||||
// Mark the page_index sub-page of the chunk as dirty.
|
||||
// page_index is an offset into the chunk.
|
||||
//
|
||||
// Note that dirty status for HMM chunks should not be modified directly from
|
||||
// UVM. Instead the kernel will mark the backing struct pages dirty either on
|
||||
// fault when written to from the CPU, or when the PTE is mirrored to the GPU
|
||||
// using hmm_range_fault().
|
||||
void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
// Mark the CPU sub-pages page_index in the CPU chunk as clean.
|
||||
// page_index has to be a page withing the chunk's region.
|
||||
// Mark the page_index sub-page of the chunk as clean.
|
||||
// page_index is an offset into the chunk.
|
||||
void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
// Return true if the CPU sub-pages page_index in the CPU chunk are dirty.
|
||||
// page_index has to be a page withing the chunk's region.
|
||||
// Return true if the page_index base page of the CPU chunk is dirty.
|
||||
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
#else // UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
|
||||
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t **chunk,
|
||||
size_t num_merge_chunks,
|
||||
uvm_chunk_size_t merge_size,
|
||||
uvm_cpu_chunk_t **merged_chunk)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
{
|
||||
SetPageDirty(chunk);
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
{
|
||||
ClearPageDirty(chunk);
|
||||
}
|
||||
|
||||
static bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
{
|
||||
return PageDirty(chunk);
|
||||
}
|
||||
#endif // !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
|
||||
// Return the first CPU chunk in the block. If no CPU chunks have been
|
||||
// allocated and/or inserted into the block, NULL is returned.
|
||||
// If not NULL, page_index will be set to the first page of the block covered by
|
||||
// the returned chunk.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_first_in_block(uvm_va_block_t *va_block, uvm_page_index_t *out_page_index);
|
||||
|
||||
// Return the next CPU chunk in the block owning chunk.
|
||||
// previous_page_index is the index after which to start searching. Its value
|
||||
// will be updated with the starting page index of the next chunk in the block.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_next(uvm_va_block_t *va_block, uvm_page_index_t *previous_page_index);
|
||||
|
||||
#define for_each_cpu_chunk_in_block(chunk, page_index, va_block) \
|
||||
for ((chunk) = uvm_cpu_chunk_first_in_block((va_block), &(page_index)); \
|
||||
(chunk) != NULL; \
|
||||
(page_index) += uvm_cpu_chunk_num_pages(chunk) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)))
|
||||
|
||||
#define for_each_cpu_chunk_in_block_safe(chunk, page_index, next_page_index, va_block) \
|
||||
for ((chunk) = uvm_cpu_chunk_first_in_block((va_block), &(page_index)), \
|
||||
(next_page_index) = (page_index) + ((chunk) ? uvm_cpu_chunk_num_pages(chunk) : 0); \
|
||||
(chunk) != NULL; \
|
||||
(page_index) = (next_page_index) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)), \
|
||||
(next_page_index) = (page_index) + ((chunk) ? uvm_cpu_chunk_num_pages(chunk) : 0))
|
||||
|
||||
// Use a special symbol for the region so it does not replace the chunk's region
|
||||
// structure member.
|
||||
#define for_each_cpu_chunk_in_block_region(chunk, page_index, va_block, __region) \
|
||||
for ((page_index) = uvm_va_block_first_page_in_mask((__region), &(va_block)->cpu.allocated), \
|
||||
(chunk) = uvm_cpu_chunk_get_chunk_for_page((va_block), (page_index)); \
|
||||
(chunk) != NULL && page_index < (__region).outer; \
|
||||
(page_index) += uvm_cpu_chunk_num_pages(chunk) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index))
|
||||
|
||||
#define for_each_cpu_chunk_in_block_region_safe(chunk, page_index, next_page_index, va_block, __region) \
|
||||
for ((page_index) = uvm_va_block_first_page_in_mask((__region), &(va_block)->cpu.allocated), \
|
||||
(chunk) = uvm_cpu_chunk_get_chunk_for_page((va_block), (page_index)), \
|
||||
(next_page_index) = (page_index) + (chunk ? uvm_cpu_chunk_num_pages(chunk) : 0); \
|
||||
(chunk) != NULL && page_index < (__region).outer; \
|
||||
(page_index) = (next_page_index) - 1, (chunk) = uvm_cpu_chunk_next((va_block), &(page_index)), \
|
||||
(next_page_index) = (page_index) + (chunk ? uvm_cpu_chunk_num_pages(chunk) : 0))
|
||||
|
||||
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
|
@ -30,6 +30,10 @@
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_kvmalloc.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_processors.h"
|
||||
|
||||
// Pre-allocated array used for dma-to-virt translations
|
||||
static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
|
||||
@ -576,3 +580,640 @@ NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *f
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
|
||||
{
|
||||
struct page **pages;
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
size_t num_pages = uvm_cpu_chunk_num_pages(chunk);
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
UVM_ASSERT(cpu_addr);
|
||||
|
||||
// Map the CPU chunk on the CPU.
|
||||
if (chunk_size > PAGE_SIZE) {
|
||||
size_t i;
|
||||
|
||||
pages = uvm_kvmalloc(num_pages * sizeof(*pages));
|
||||
if (!pages)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
pages[i] = chunk->page + i;
|
||||
}
|
||||
else {
|
||||
pages = &chunk->page;
|
||||
}
|
||||
|
||||
*cpu_addr = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
|
||||
if (!*cpu_addr)
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
|
||||
if (chunk_size > PAGE_SIZE)
|
||||
uvm_kvfree(pages);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_access(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
uvm_gpu_phys_address_t gpu_phys_addr;
|
||||
uvm_gpu_address_t gpu_addr;
|
||||
uvm_push_t push;
|
||||
NvU32 *cpu_addr;
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
size_t i;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
TEST_NV_CHECK_RET(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr));
|
||||
memset(cpu_addr, 0, chunk_size);
|
||||
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent);
|
||||
gpu_phys_addr = uvm_gpu_phys_address(UVM_APERTURE_SYS, dma_addr);
|
||||
|
||||
if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu))
|
||||
gpu_addr = uvm_gpu_address_virtual_from_sysmem_phys(gpu, gpu_phys_addr.address);
|
||||
else
|
||||
gpu_addr = uvm_gpu_address_from_phys(gpu_phys_addr);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_GPU_TO_CPU,
|
||||
NULL,
|
||||
&push,
|
||||
"GPU -> CPU {%s, %llx} %u bytes",
|
||||
uvm_gpu_address_aperture_string(gpu_addr),
|
||||
gpu_addr.address,
|
||||
chunk_size),
|
||||
done);
|
||||
gpu->parent->ce_hal->memset_4(&push, gpu_addr, 0xdeadc0de, chunk_size);
|
||||
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), done);
|
||||
|
||||
for (i = 0; i < chunk_size / sizeof(*cpu_addr); i++) {
|
||||
if (cpu_addr[i] != 0xdeadc0de) {
|
||||
UVM_TEST_PRINT("GPU write of {%s, 0x%llx} %u bytes expected pattern 0x%08x, but offset %zu is 0x%08x\n",
|
||||
uvm_gpu_address_aperture_string(gpu_addr),
|
||||
gpu_addr.address,
|
||||
chunk_size,
|
||||
0xdeadc0de,
|
||||
i * sizeof(*cpu_addr),
|
||||
cpu_addr[i]);
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
vunmap(cpu_addr);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_alloc(uvm_chunk_size_t size,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
uvm_cpu_chunk_t **out_chunk)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(out_chunk);
|
||||
|
||||
// It is possible that the allocation fails due to lack of large pages
|
||||
// rather than an API issue, which will result in a false negative.
|
||||
// However, that should be very rare.
|
||||
TEST_NV_CHECK_RET(uvm_cpu_chunk_alloc(size, flags, &chunk));
|
||||
|
||||
// Check general state of the chunk:
|
||||
// - chunk should be a physical chunk,
|
||||
// - chunk should have the correct size,
|
||||
// - chunk should have the correct number of base pages, and
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_physical(chunk), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(chunk) == size, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_num_pages(chunk) == size / PAGE_SIZE, done);
|
||||
|
||||
if (flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO) {
|
||||
NvU64 *cpu_addr;
|
||||
|
||||
TEST_NV_CHECK_GOTO(cpu_chunk_map_on_cpu(chunk, (void **)&cpu_addr), done);
|
||||
for (i = 0; i < size / sizeof(*cpu_addr); i++)
|
||||
TEST_CHECK_GOTO(cpu_addr[i] == 0, done);
|
||||
vunmap(cpu_addr);
|
||||
}
|
||||
|
||||
for (i = 0; i < size / PAGE_SIZE; i++) {
|
||||
if (flags & UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
}
|
||||
|
||||
done:
|
||||
if (status == NV_OK)
|
||||
*out_chunk = chunk;
|
||||
else
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_basic_verify(uvm_gpu_t *gpu,
|
||||
uvm_cpu_chunk_alloc_flags_t flags,
|
||||
uvm_chunk_size_t size)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, flags, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
// Check state of the physical chunk:
|
||||
// - gpu_mappings.max_entries should be 1 (for the static entry),
|
||||
// - gpu_mappings.dma_addrs_mask should be 0.
|
||||
// - no GPU mapping address.
|
||||
TEST_CHECK_GOTO(phys_chunk->gpu_mappings.max_entries == 1, done);
|
||||
TEST_CHECK_GOTO(uvm_processor_mask_get_gpu_count(&phys_chunk->gpu_mappings.dma_addrs_mask) == 0, done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
|
||||
// Test basic access.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test double map is harmless.
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent) == dma_addr, done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
// Test unmap, remap.
|
||||
uvm_cpu_chunk_unmap_gpu_phys(chunk, gpu->parent);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent) == 0, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done);
|
||||
|
||||
done:
|
||||
// Test free with mapped GPUs still works.
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
return status;;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_basic(uvm_gpu_t *gpu, uvm_cpu_chunk_alloc_flags_t flags)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t chunk_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t size;
|
||||
|
||||
for_each_chunk_size(size, chunk_sizes)
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_mapping_basic_verify(gpu, flags, size));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_mapping_array(uvm_gpu_t *gpu1, uvm_gpu_t *gpu2, uvm_gpu_t *gpu3)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
NvU64 dma_addr_gpu2;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(PAGE_SIZE, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu3), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu3), done);
|
||||
dma_addr_gpu2 = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu2->parent);
|
||||
uvm_cpu_chunk_unmap_gpu_phys(chunk, gpu3->parent);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu1), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu2), done);
|
||||
|
||||
// DMA mapping addresses for different GPUs live in different IOMMU spaces,
|
||||
// so it would be perfectly legal for them to have the same IOVA, and even
|
||||
// if they lived in the same space we freed GPU3's address so it would be
|
||||
// available for reuse.
|
||||
// What we need to ensure is that GPU2's address didn't change after we map
|
||||
// GPU1. It's true that we may get a false negative if both addresses
|
||||
// happened to alias and we had a bug in how the addresses are shifted in
|
||||
// the dense array, but that's better than intermittent failure.
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu2->parent) == dma_addr_gpu2, done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS do_test_cpu_chunk_split_and_merge(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t num_split_chunks;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
uvm_cpu_chunk_t *merged_chunk;
|
||||
uvm_chunk_size_t split_size;
|
||||
NvU64 phys_dma_addr;
|
||||
size_t map_chunk;
|
||||
size_t i;
|
||||
|
||||
split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
|
||||
UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_split_chunks = size / split_size;
|
||||
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
|
||||
|
||||
if (!split_chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(chunk, gpu), done_free);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(chunk, gpu), done_free);
|
||||
uvm_cpu_chunk_unmap_gpu_phys(chunk, gpu->parent);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
TEST_CHECK_GOTO(nv_kref_read(&chunk->refcount) == num_split_chunks, done);
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
TEST_CHECK_GOTO(split_chunks[i], done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[i]), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[i]) == split_size, done);
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
|
||||
}
|
||||
|
||||
// Test CPU chunk merging.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
|
||||
|
||||
// Test that GPU mappings are transferred after a split
|
||||
phys_dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu->parent);
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
NvU64 dma_addr;
|
||||
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[i], gpu), done);
|
||||
dma_addr = uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu->parent);
|
||||
TEST_CHECK_GOTO(dma_addr == phys_dma_addr + (i * split_size), done);
|
||||
uvm_cpu_chunk_unmap_gpu_phys(split_chunks[i], gpu->parent);
|
||||
}
|
||||
|
||||
// Test that mapping one logical chunk does not affect others.
|
||||
map_chunk = num_split_chunks / 2;
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[map_chunk], gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(split_chunks[map_chunk], gpu), done);
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
if (i != map_chunk)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[i], gpu->parent) == 0, done);
|
||||
}
|
||||
|
||||
if (split_size > PAGE_SIZE) {
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
TEST_NV_CHECK_GOTO(do_test_cpu_chunk_split_and_merge(split_chunks[i], gpu), done);
|
||||
}
|
||||
|
||||
// Map all chunks before merging.
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
|
||||
|
||||
// Test CPU chunk merging.
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
|
||||
// At this point, all split chunks have been merged.
|
||||
num_split_chunks = 0;
|
||||
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(merged_chunk) == size, done_free);
|
||||
TEST_CHECK_GOTO(merged_chunk == chunk, done_free);
|
||||
|
||||
// Since all logical chunks were mapped, the entire merged chunk should
|
||||
// be accessible without needing to map it.
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_access(merged_chunk, gpu), done_free);
|
||||
|
||||
done:
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
|
||||
done_free:
|
||||
uvm_kvfree(split_chunks);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_split_and_merge(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t size;
|
||||
|
||||
size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
|
||||
for_each_chunk_size_from(size, alloc_sizes) {
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
NV_STATUS status;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
status = do_test_cpu_chunk_split_and_merge(chunk, gpu);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_dirty_split(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_chunk_size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_size_t split_size;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
uvm_cpu_chunk_t *merged_chunk;
|
||||
size_t num_pages = size / PAGE_SIZE;
|
||||
size_t num_split_chunks;
|
||||
size_t num_split_chunk_pages;
|
||||
size_t i;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
|
||||
UVM_ASSERT(split_size != UVM_CHUNK_SIZE_INVALID);
|
||||
num_split_chunks = size / split_size;
|
||||
num_split_chunk_pages = split_size / PAGE_SIZE;
|
||||
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
|
||||
if (!split_chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
// The parent chunk had only the even pages set as dirty. Make sure
|
||||
// that's still the case after the split.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
uvm_page_index_t chunk_page;
|
||||
|
||||
for (chunk_page = 0; chunk_page < num_split_chunk_pages; chunk_page++) {
|
||||
if (((i * num_split_chunk_pages) + chunk_page) % 2)
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(split_chunks[i], chunk_page), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(split_chunks[i], chunk_page), done);
|
||||
}
|
||||
}
|
||||
|
||||
if (split_size > PAGE_SIZE) {
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty_split(split_chunks[i]), done);
|
||||
}
|
||||
|
||||
merged_chunk = uvm_cpu_chunk_merge(split_chunks);
|
||||
num_split_chunks = 0;
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
if (i % 2)
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(merged_chunk, i), done_free);
|
||||
else
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(merged_chunk, i), done_free);
|
||||
}
|
||||
|
||||
done:
|
||||
for (i = 0; i < num_split_chunks; i++)
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
|
||||
done_free:
|
||||
uvm_kvfree(split_chunks);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_cpu_chunk_dirty(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_chunk_size_t size;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t i;
|
||||
|
||||
for_each_chunk_size(size, alloc_sizes) {
|
||||
uvm_cpu_physical_chunk_t *phys_chunk;
|
||||
size_t num_pages;
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
num_pages = uvm_cpu_chunk_num_pages(chunk);
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
|
||||
if (size > PAGE_SIZE)
|
||||
TEST_CHECK_GOTO(bitmap_empty(phys_chunk->dirty_bitmap, num_pages), done);
|
||||
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO, &chunk));
|
||||
phys_chunk = uvm_cpu_chunk_to_physical(chunk);
|
||||
num_pages = uvm_cpu_chunk_num_pages(chunk);
|
||||
|
||||
// Allocating the chunk with UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO will set the
|
||||
// entire chunk as dirty.
|
||||
for (i = 0; i < num_pages; i++)
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
|
||||
if (size > PAGE_SIZE)
|
||||
TEST_CHECK_GOTO(bitmap_full(phys_chunk->dirty_bitmap, num_pages), done);
|
||||
|
||||
// For chunks larger than PAGE_SIZE, marking individual pages in a
|
||||
// physical chunk should not affect the entire chunk.
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
uvm_cpu_chunk_mark_clean(chunk, i);
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
if (size > PAGE_SIZE) {
|
||||
TEST_CHECK_GOTO(bitmap_empty(phys_chunk->dirty_bitmap, i + 1), done);
|
||||
TEST_CHECK_GOTO(bitmap_weight(phys_chunk->dirty_bitmap, num_pages) == num_pages - (i + 1), done);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
uvm_cpu_chunk_mark_dirty(chunk, i);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
if (size > PAGE_SIZE) {
|
||||
TEST_CHECK_GOTO(bitmap_full(phys_chunk->dirty_bitmap, i + 1), done);
|
||||
TEST_CHECK_GOTO(bitmap_weight(phys_chunk->dirty_bitmap, num_pages) == i + 1, done);
|
||||
}
|
||||
}
|
||||
|
||||
// Leave only even pages as dirty
|
||||
for (i = 1; i < num_pages; i += 2)
|
||||
uvm_cpu_chunk_mark_clean(chunk, i);
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
if (i % 2) {
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
if (size > PAGE_SIZE)
|
||||
TEST_CHECK_GOTO(!test_bit(i, phys_chunk->dirty_bitmap), done);
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_dirty(chunk, i), done);
|
||||
if (size > PAGE_SIZE)
|
||||
TEST_CHECK_GOTO(test_bit(i, phys_chunk->dirty_bitmap), done);
|
||||
}
|
||||
}
|
||||
|
||||
if (size > PAGE_SIZE)
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty_split(chunk), done);
|
||||
|
||||
done:
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS do_test_cpu_chunk_free(uvm_cpu_chunk_t *chunk, uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_cpu_chunk_t **split_chunks;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_size_t split_size = uvm_chunk_find_prev_size(alloc_sizes, size);
|
||||
size_t num_split_chunks = size / split_size;
|
||||
uvm_gpu_t *gpu;
|
||||
size_t i;
|
||||
size_t j;
|
||||
|
||||
split_chunks = uvm_kvmalloc_zero(num_split_chunks * sizeof(*split_chunks));
|
||||
if (!split_chunks) {
|
||||
UVM_TEST_PRINT("Failed to allocate split chunk array memory");
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto done_free;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_split(chunk, split_chunks), done_free);
|
||||
|
||||
// The caller does not free the input chunk.
|
||||
// So, we have to do it in this function. However, beyond this point
|
||||
// the input chunk will be freed by freeing the split chunks.
|
||||
chunk = NULL;
|
||||
|
||||
// Map every other chunk.
|
||||
// The call to uvm_cpu_chunk_unmap_gpu_phys() is here in case this is part
|
||||
// of a double split (see below). In that case, the parent chunk would be
|
||||
// either mapped or unmapped.
|
||||
//
|
||||
// If it is mapped, we have to unmap the subchunks in
|
||||
// order for the mapping check below to succeed. If it is unmapped, the
|
||||
// calls are noops.
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
|
||||
if (i & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_NV_CHECK_GOTO(uvm_cpu_chunk_map_gpu(split_chunks[i], gpu), done);
|
||||
else
|
||||
uvm_cpu_chunk_unmap_gpu_phys(split_chunks[i], gpu->parent);
|
||||
}
|
||||
}
|
||||
|
||||
// Do a double split if we can
|
||||
if (split_size > PAGE_SIZE) {
|
||||
size_t chunk_to_be_resplit;
|
||||
|
||||
// Test an even (mapped) chunk.
|
||||
chunk_to_be_resplit = num_split_chunks / 2;
|
||||
TEST_NV_CHECK_GOTO(do_test_cpu_chunk_free(split_chunks[chunk_to_be_resplit], va_space, test_gpus), done);
|
||||
|
||||
// The chunk would have been freed by do_test_cpu_chunk_free().
|
||||
split_chunks[chunk_to_be_resplit] = NULL;
|
||||
|
||||
// Test an odd (unmapped) chunk.
|
||||
chunk_to_be_resplit += 1;
|
||||
TEST_NV_CHECK_GOTO(do_test_cpu_chunk_free(split_chunks[chunk_to_be_resplit], va_space, test_gpus), done);
|
||||
split_chunks[chunk_to_be_resplit] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
if (!split_chunks[i])
|
||||
continue;
|
||||
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
split_chunks[i] = NULL;
|
||||
|
||||
for (j = i + 1; j < num_split_chunks; j++) {
|
||||
if (!split_chunks[j])
|
||||
continue;
|
||||
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_is_logical(split_chunks[j]), done);
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_size(split_chunks[j]) == split_size, done);
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
|
||||
if (j & (1 << uvm_id_gpu_index(gpu->id)))
|
||||
TEST_CHECK_GOTO(uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
else
|
||||
TEST_CHECK_GOTO(!uvm_cpu_chunk_get_gpu_phys_addr(split_chunks[j], gpu->parent), done);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
for (i = 0; i < num_split_chunks; i++) {
|
||||
if (split_chunks[i])
|
||||
uvm_cpu_chunk_free(split_chunks[i]);
|
||||
}
|
||||
|
||||
done_free:
|
||||
if (chunk)
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
||||
uvm_kvfree(split_chunks);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS test_cpu_chunk_free(uvm_va_space_t *va_space, uvm_processor_mask_t *test_gpus)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
uvm_chunk_sizes_mask_t alloc_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
size_t size = uvm_chunk_find_next_size(alloc_sizes, PAGE_SIZE);
|
||||
|
||||
for_each_chunk_size_from(size, alloc_sizes) {
|
||||
TEST_NV_CHECK_RET(test_cpu_chunk_alloc(size, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE, &chunk));
|
||||
TEST_NV_CHECK_RET(do_test_cpu_chunk_free(chunk, va_space, test_gpus));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
uvm_processor_mask_t test_gpus;
|
||||
uvm_gpu_t *gpu;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
uvm_processor_mask_and(&test_gpus,
|
||||
&va_space->registered_gpus,
|
||||
&va_space->accessible_from[uvm_id_value(UVM_ID_CPU)]);
|
||||
|
||||
for_each_va_space_gpu_in_mask(gpu, va_space, &test_gpus) {
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty(gpu), done);
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, &test_gpus), done);
|
||||
|
||||
if (uvm_processor_mask_get_gpu_count(&test_gpus) >= 3) {
|
||||
uvm_gpu_t *gpu2, *gpu3;
|
||||
|
||||
gpu = uvm_processor_mask_find_first_va_space_gpu(&test_gpus, va_space);
|
||||
gpu2 = uvm_processor_mask_find_next_va_space_gpu(&test_gpus, va_space, gpu);
|
||||
gpu3 = uvm_processor_mask_find_next_va_space_gpu(&test_gpus, va_space, gpu2);
|
||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
return status;
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ static NV_STATUS split_span_as_needed(uvm_va_space_t *va_space,
|
||||
return split_as_needed(va_space, end_addr, split_needed_cb, data);
|
||||
}
|
||||
|
||||
static bool preferred_location_is_split_needed(uvm_va_policy_t *policy, void *data)
|
||||
static bool preferred_location_is_split_needed(const uvm_va_policy_t *policy, void *data)
|
||||
{
|
||||
uvm_processor_id_t processor_id;
|
||||
|
||||
@ -152,12 +152,13 @@ static bool preferred_location_is_split_needed(uvm_va_policy_t *policy, void *da
|
||||
}
|
||||
|
||||
static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
const uvm_page_mask_t *mapped_mask;
|
||||
@ -185,7 +186,7 @@ static NV_STATUS preferred_location_unmap_remote_pages(uvm_va_block_t *va_block,
|
||||
status = uvm_va_block_unmap(va_block,
|
||||
va_block_context,
|
||||
preferred_location,
|
||||
uvm_va_block_region_from_block(va_block),
|
||||
region,
|
||||
&va_block_context->caller_page_mask,
|
||||
&local_tracker);
|
||||
|
||||
@ -200,17 +201,15 @@ done:
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
// TODO: Bug 1750144: remove this restriction when HMM handles setting
|
||||
// the preferred location semantics instead of just recording the policy.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
uvm_va_block_mark_cpu_dirty(va_block);
|
||||
|
||||
return preferred_location_unmap_remote_pages(va_block, va_block_context);
|
||||
return preferred_location_unmap_remote_pages(va_block, va_block_context, region);
|
||||
}
|
||||
|
||||
static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
@ -278,7 +277,7 @@ static NV_STATUS preferred_location_set(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
return uvm_hmm_set_preferred_location(va_space, preferred_location, base, last_address);
|
||||
return uvm_hmm_set_preferred_location(va_space, preferred_location, base, last_address, out_tracker);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_api_set_preferred_location(const UVM_SET_PREFERRED_LOCATION_PARAMS *params, struct file *filp)
|
||||
@ -405,20 +404,22 @@ NV_STATUS uvm_api_unset_preferred_location(const UVM_UNSET_PREFERRED_LOCATION_PA
|
||||
return status == NV_OK ? tracker_status : status;
|
||||
}
|
||||
|
||||
static NV_STATUS va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS tracker_status;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
status = uvm_va_block_add_mappings(va_block,
|
||||
va_block_context,
|
||||
processor_id,
|
||||
uvm_va_block_region_from_block(va_block),
|
||||
region,
|
||||
NULL,
|
||||
UvmEventMapRemoteCausePolicy);
|
||||
|
||||
@ -432,6 +433,7 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
||||
NV_STATUS status;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
@ -443,10 +445,12 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
|
||||
return NV_OK;
|
||||
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, NULL,
|
||||
va_block_set_accessed_by_locked(va_block,
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
|
||||
NULL,
|
||||
uvm_va_block_set_accessed_by_locked(va_block,
|
||||
va_block_context,
|
||||
processor_id,
|
||||
region,
|
||||
&local_tracker));
|
||||
|
||||
// TODO: Bug 1767224: Combine all accessed_by operations into single tracker
|
||||
@ -463,7 +467,7 @@ typedef struct
|
||||
bool set_bit;
|
||||
} accessed_by_split_params_t;
|
||||
|
||||
static bool accessed_by_is_split_needed(uvm_va_policy_t *policy, void *data)
|
||||
static bool accessed_by_is_split_needed(const uvm_va_policy_t *policy, void *data)
|
||||
{
|
||||
accessed_by_split_params_t *params = (accessed_by_split_params_t*)data;
|
||||
|
||||
@ -560,7 +564,8 @@ static NV_STATUS accessed_by_set(uvm_va_space_t *va_space,
|
||||
processor_id,
|
||||
set_bit,
|
||||
base,
|
||||
last_address);
|
||||
last_address,
|
||||
&local_tracker);
|
||||
|
||||
done:
|
||||
tracker_status = uvm_tracker_wait_deinit(&local_tracker);
|
||||
@ -641,7 +646,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
uvm_processor_id_t processor_id;
|
||||
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
const uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||
uvm_processor_mask_t accessed_by = policy->accessed_by;
|
||||
|
||||
@ -703,9 +708,10 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
||||
|
||||
// 2- Re-establish SetAccessedBy mappings
|
||||
for_each_id_in_mask(processor_id, &accessed_by) {
|
||||
status = va_block_set_accessed_by_locked(va_block,
|
||||
status = uvm_va_block_set_accessed_by_locked(va_block,
|
||||
va_block_context,
|
||||
processor_id,
|
||||
block_region,
|
||||
out_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -738,7 +744,7 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
static bool read_duplication_is_split_needed(uvm_va_policy_t *policy, void *data)
|
||||
static bool read_duplication_is_split_needed(const uvm_va_policy_t *policy, void *data)
|
||||
{
|
||||
uvm_read_duplication_policy_t new_policy;
|
||||
|
||||
|
@ -314,6 +314,14 @@ static bool uvm_id_equal(uvm_processor_id_t id1, uvm_processor_id_t id2)
|
||||
return id1.val == id2.val;
|
||||
}
|
||||
|
||||
static int uvm_global_id_cmp(uvm_global_processor_id_t id1, uvm_global_processor_id_t id2)
|
||||
{
|
||||
UVM_GLOBAL_ID_CHECK_BOUNDS(id1);
|
||||
UVM_GLOBAL_ID_CHECK_BOUNDS(id2);
|
||||
|
||||
return UVM_CMP_DEFAULT(id1.val, id2.val);
|
||||
}
|
||||
|
||||
static bool uvm_global_id_equal(uvm_global_processor_id_t id1, uvm_global_processor_id_t id2)
|
||||
{
|
||||
UVM_GLOBAL_ID_CHECK_BOUNDS(id1);
|
||||
|
@ -447,16 +447,16 @@ NvU64 *uvm_push_timestamp(uvm_push_t *push)
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
bool uvm_push_method_validate(uvm_push_t *push, NvU8 subch, NvU32 method_address, NvU32 method_data)
|
||||
bool uvm_push_method_is_valid(uvm_push_t *push, NvU8 subch, NvU32 method_address, NvU32 method_data)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||||
|
||||
if (subch == UVM_SUBCHANNEL_CE)
|
||||
return gpu->parent->ce_hal->method_validate(push, method_address, method_data);
|
||||
return gpu->parent->ce_hal->method_is_valid(push, method_address, method_data);
|
||||
else if (subch == UVM_SUBCHANNEL_HOST)
|
||||
return gpu->parent->host_hal->method_validate(push, method_address, method_data);
|
||||
return gpu->parent->host_hal->method_is_valid(push, method_address, method_data);
|
||||
else if (subch == UVM_SW_OBJ_SUBCHANNEL)
|
||||
return gpu->parent->host_hal->sw_method_validate(push, method_address, method_data);
|
||||
return gpu->parent->host_hal->sw_method_is_valid(push, method_address, method_data);
|
||||
|
||||
UVM_ERR_PRINT("Unsupported subchannel 0x%x\n", subch);
|
||||
return false;
|
||||
|
@ -149,7 +149,8 @@ struct uvm_push_info_struct
|
||||
char description[128];
|
||||
|
||||
// Procedure to be called when the corresponding push is complete.
|
||||
// This procedure is called with the UVM_LOCK_ORDER_CHANNEL spin lock held.
|
||||
// This procedure is called with the channel pool lock held, which
|
||||
// may be a spinlock.
|
||||
void (*on_complete)(void *);
|
||||
void *on_complete_data;
|
||||
};
|
||||
@ -438,7 +439,7 @@ static uvm_gpu_t *uvm_push_get_gpu(uvm_push_t *push)
|
||||
|
||||
// Validate that the given method can be pushed to the underlying channel. The
|
||||
// method contents can be used to further validate individual fields.
|
||||
bool uvm_push_method_validate(uvm_push_t *push, NvU8 subch, NvU32 method_address, NvU32 method_data);
|
||||
bool uvm_push_method_is_valid(uvm_push_t *push, NvU8 subch, NvU32 method_address, NvU32 method_data);
|
||||
|
||||
// Retrieve the push info object for a push that has already started
|
||||
static uvm_push_info_t *uvm_push_info_from_push(uvm_push_t *push)
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "clb06f.h"
|
||||
|
||||
#define HWMASK(d, r, f) DRF_MASK(NV ## d ## _ ## r ## _ ## f)
|
||||
#define HWSHIFT(d, r, f) DRF_SHIFT(NV ## d ## _ ## r ## _ ## f)
|
||||
#define HWSHIFTMASK(d, r, f) DRF_SHIFTMASK(NV ## d ## _ ## r ## _ ## f)
|
||||
#define HWSIZE(d, r, f) DRF_SIZE(NV ## d ## _ ## r ## _ ## f)
|
||||
#define HWCONST(d, r, f, c) DRF_DEF(d, _ ## r, _ ## f, _ ## c)
|
||||
@ -92,6 +93,8 @@
|
||||
#define UVM_SUBCHANNEL_C0B5 UVM_SUBCHANNEL_CE
|
||||
|
||||
#define UVM_SUBCHANNEL_C36F UVM_SUBCHANNEL_HOST
|
||||
#define UVM_SUBCHANNEL_C3B5 UVM_SUBCHANNEL_CE
|
||||
|
||||
#define UVM_SUBCHANNEL_C46F UVM_SUBCHANNEL_HOST
|
||||
|
||||
#define UVM_SUBCHANNEL_C56F UVM_SUBCHANNEL_HOST
|
||||
@ -143,7 +146,7 @@
|
||||
do { \
|
||||
__NV_PUSH_0U(subch, count, a1); \
|
||||
push->next[0] = d1; \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a1, d1), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a1, d1), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
++push->next; \
|
||||
@ -153,7 +156,7 @@
|
||||
do { \
|
||||
__UVM_ASSERT_CONTIGUOUS_METHODS(a1, a2); \
|
||||
__NV_PUSH_1U(subch, count, a1,d1); \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a2, d2), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a2, d2), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
push->next[0] = d2; \
|
||||
@ -164,7 +167,7 @@
|
||||
do { \
|
||||
__UVM_ASSERT_CONTIGUOUS_METHODS(a2, a3); \
|
||||
__NV_PUSH_2U(subch, count, a1,d1, a2,d2); \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a3, d3), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a3, d3), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
push->next[0] = d3; \
|
||||
@ -175,7 +178,7 @@
|
||||
do { \
|
||||
__UVM_ASSERT_CONTIGUOUS_METHODS(a3, a4); \
|
||||
__NV_PUSH_3U(subch, count, a1,d1, a2,d2, a3,d3); \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a4, d4), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a4, d4), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
push->next[0] = d4; \
|
||||
@ -186,7 +189,7 @@
|
||||
do { \
|
||||
__UVM_ASSERT_CONTIGUOUS_METHODS(a4, a5); \
|
||||
__NV_PUSH_4U(subch, count, a1,d1, a2,d2, a3,d3, a4,d4); \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a5, d5), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a5, d5), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
push->next[0] = d5; \
|
||||
@ -197,7 +200,7 @@
|
||||
do { \
|
||||
__UVM_ASSERT_CONTIGUOUS_METHODS(a5, a6); \
|
||||
__NV_PUSH_5U(subch, count, a1,d1, a2,d2, a3,d3, a4,d4, a5,d5); \
|
||||
UVM_ASSERT_MSG(uvm_push_method_validate(push, subch, a6, d6), \
|
||||
UVM_ASSERT_MSG(uvm_push_method_is_valid(push, subch, a6, d6), \
|
||||
"Method validation failed in channel %s\n", \
|
||||
push->channel->name); \
|
||||
push->next[0] = d6; \
|
||||
|
@ -48,40 +48,46 @@ static NvU32 get_push_end_size(uvm_channel_t *channel)
|
||||
|
||||
static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU32 push_size;
|
||||
NvU32 i;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
for (i = 0; i < UVM_CHANNEL_TYPE_COUNT; ++i) {
|
||||
uvm_channel_type_t type;
|
||||
|
||||
for (type = 0; type < UVM_CHANNEL_TYPE_COUNT; ++type) {
|
||||
uvm_push_t push;
|
||||
NvU32 push_end_size;
|
||||
uvm_channel_type_t type = i;
|
||||
NvU32 push_size_before;
|
||||
NvU32 push_end_size_observed, push_end_size_expected;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, type, &push, "type %u\n", (unsigned)type);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
TEST_NV_CHECK_RET(uvm_push_begin(gpu->channel_manager,
|
||||
type,
|
||||
&push,
|
||||
"type %s\n",
|
||||
uvm_channel_type_to_string(type)));
|
||||
|
||||
push_end_size = get_push_end_size(push.channel);
|
||||
push_size = uvm_push_get_size(&push);
|
||||
push_size_before = uvm_push_get_size(&push);
|
||||
uvm_push_end(&push);
|
||||
if (uvm_push_get_size(&push) - push_size != push_end_size) {
|
||||
UVM_TEST_PRINT("push_end_size incorrect, %u instead of %u for GPU %s\n",
|
||||
uvm_push_get_size(&push) - push_size,
|
||||
push_end_size,
|
||||
|
||||
push_end_size_expected = get_push_end_size(push.channel);
|
||||
push_end_size_observed = uvm_push_get_size(&push) - push_size_before;
|
||||
|
||||
if (push_end_size_observed != push_end_size_expected) {
|
||||
UVM_TEST_PRINT("push_end_size incorrect, %u instead of %u on channel type %s for GPU %s\n",
|
||||
push_end_size_observed,
|
||||
push_end_size_expected,
|
||||
uvm_channel_type_to_string(type),
|
||||
uvm_gpu_name(gpu));
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto done;
|
||||
|
||||
// The size mismatch error gets precedence over a wait error
|
||||
(void) uvm_push_wait(&push);
|
||||
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_push_wait(&push));
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
uvm_channel_manager_wait(gpu->channel_manager);
|
||||
}
|
||||
|
||||
return status;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
@ -201,6 +207,7 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
|
||||
NvU32 i;
|
||||
uvm_push_t *pushes;
|
||||
uvm_tracker_t tracker = UVM_TRACKER_INIT();
|
||||
uvm_channel_type_t channel_type = UVM_CHANNEL_TYPE_GPU_INTERNAL;
|
||||
|
||||
// As noted above, this test does unsafe things that would be detected by
|
||||
// lock tracking, opt-out.
|
||||
@ -213,9 +220,10 @@ static NV_STATUS test_concurrent_pushes(uvm_va_space_t *va_space)
|
||||
}
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
|
||||
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
|
||||
uvm_push_t *push = &pushes[i];
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_CPU_TO_GPU, push, "concurrent push %u", i);
|
||||
status = uvm_push_begin(gpu->channel_manager, channel_type, push, "concurrent push %u", i);
|
||||
TEST_CHECK_GOTO(status == NV_OK, done);
|
||||
}
|
||||
for (i = 0; i < UVM_PUSH_MAX_CONCURRENT_PUSHES; ++i) {
|
||||
@ -760,6 +768,7 @@ static NV_STATUS test_push_gpu_to_gpu(uvm_va_space_t *va_space)
|
||||
bool waive = true;
|
||||
|
||||
for_each_va_space_gpu(gpu_a, va_space) {
|
||||
|
||||
for_each_va_space_gpu(gpu_b, va_space) {
|
||||
if (can_do_peer_copies(va_space, gpu_a, gpu_b)) {
|
||||
waive = false;
|
||||
|
@ -284,12 +284,12 @@ NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_ali
|
||||
UVM_ASSERT(rm_mem);
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// Peer mappings not supported yet
|
||||
UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
|
||||
|
||||
if (uvm_rm_mem_mapped_on_gpu(rm_mem, gpu))
|
||||
return NV_OK;
|
||||
|
||||
// Peer mappings are not supported yet
|
||||
UVM_ASSERT(rm_mem->type == UVM_RM_MEM_TYPE_SYS);
|
||||
|
||||
gpu_owner = rm_mem->gpu_owner;
|
||||
gpu_owner_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu_owner);
|
||||
|
||||
@ -334,8 +334,9 @@ void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
|
||||
UVM_ASSERT(rm_mem);
|
||||
UVM_ASSERT(gpu);
|
||||
|
||||
// Cannot unmap from the gpu that owns the allocation.
|
||||
UVM_ASSERT_MSG(rm_mem->gpu_owner != gpu, "GPU %s\n", uvm_gpu_name(gpu));
|
||||
// The GPU owner mapping remains valid until the memory is freed.
|
||||
if (gpu == rm_mem->gpu_owner)
|
||||
return;
|
||||
|
||||
rm_mem_unmap_gpu(rm_mem, gpu);
|
||||
}
|
||||
|
@ -120,8 +120,9 @@ NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
|
||||
// Map/Unmap on UVM's internal address space of a GPU. In SR-IOV heavy the
|
||||
// operation is also applied on the GPU's proxy address space.
|
||||
//
|
||||
// Supported only for sysmem (UVM_RM_MEM_TYPE_SYS). The GPU has to be different
|
||||
// from the one the memory was originally allocated for.
|
||||
// Mapping/unmapping on the GPU owner, or mapping on an already mapped GPU, are
|
||||
// no-ops. Mapping/unmapping on a GPU different from the owner is only supported
|
||||
// for system memory.
|
||||
//
|
||||
// Locking same as uvm_rm_mem_alloc()
|
||||
NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment);
|
||||
|
@ -64,10 +64,9 @@ static NV_STATUS check_alignment(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 ali
|
||||
{
|
||||
// Alignment requirements only apply to mappings in the UVM-owned VA space
|
||||
if (alignment != 0) {
|
||||
bool is_proxy_va_space = false;
|
||||
NvU64 gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, is_proxy_va_space);
|
||||
NvU64 gpu_uvm_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
|
||||
|
||||
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
|
||||
TEST_CHECK_RET(IS_ALIGNED(gpu_uvm_va, alignment));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
@ -76,20 +75,51 @@ static NV_STATUS check_alignment(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 ali
|
||||
static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
|
||||
{
|
||||
uvm_gpu_t *gpu = rm_mem->gpu_owner;
|
||||
NvU64 gpu_uvm_va;
|
||||
NvU64 gpu_proxy_va = 0;
|
||||
|
||||
// The memory should have been automatically mapped in the GPU owner
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
|
||||
|
||||
gpu_uvm_va = uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu);
|
||||
|
||||
// In SR-IOV heavy, there are two VA spaces per GPU, so there are two
|
||||
// mappings for a single rm_mem object on a GPU, even if the memory is
|
||||
// located in vidmem.
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
|
||||
gpu_proxy_va = uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu);
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_RET(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(check_alignment(rm_mem, gpu, alignment));
|
||||
|
||||
// Explicitly mapping or unmapping to the GPU that owns the allocation is
|
||||
// not allowed, so the testing related to GPU owners is simpler than that of
|
||||
// other GPUs.
|
||||
// Mappings are not ref counted, so additional map calls are no-ops; the
|
||||
// GPU VA should remain the same for all the applicable VA spaces.
|
||||
TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu, alignment));
|
||||
|
||||
TEST_CHECK_RET(gpu_uvm_va == uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu));
|
||||
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu))
|
||||
TEST_CHECK_RET(gpu_proxy_va == uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu));
|
||||
|
||||
// Unmapping the GPU owner is a no-op
|
||||
uvm_rm_mem_unmap_gpu(rm_mem, gpu);
|
||||
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
|
||||
TEST_CHECK_RET(gpu_uvm_va == uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu));
|
||||
|
||||
if (uvm_gpu_uses_proxy_channel_pool(gpu)) {
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
TEST_CHECK_RET(gpu_proxy_va == uvm_rm_mem_get_gpu_proxy_va(rm_mem, gpu));
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_RET(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
@ -324,13 +324,12 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_RB_TREE_RANDOM, uvm_test_rb_tree_random);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_GET_USER_SPACE_END_ADDRESS, uvm_test_get_user_space_end_address);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES, uvm_test_get_cpu_chunk_allocation_sizes);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HMM_SANITY, uvm_test_hmm_sanity);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR,
|
||||
uvm_test_va_range_inject_add_gpu_va_space_error);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY, uvm_test_destroy_gpu_va_space_delay);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HMM_INIT, uvm_test_hmm_init);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CPU_CHUNK_API, uvm_test_cpu_chunk_api);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -187,5 +187,5 @@ NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp);
|
||||
#endif
|
||||
|
@ -616,6 +616,7 @@ typedef struct
|
||||
// Array of processors which have a virtual mapping covering lookup_address.
|
||||
NvProcessorUuid mapped_on[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU32 mapping_type[UVM_MAX_PROCESSORS]; // Out
|
||||
NvU64 mapping_physical_address[UVM_MAX_PROCESSORS] NV_ALIGN_BYTES(8); // Out
|
||||
NvU32 mapped_on_count; // Out
|
||||
|
||||
// The size of the virtual mapping covering lookup_address on each
|
||||
@ -1394,16 +1395,6 @@ typedef struct
|
||||
NvU32 rmStatus; // Out
|
||||
} UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS;
|
||||
|
||||
#define UVM_TEST_HMM_SANITY UVM_TEST_IOCTL_BASE(92)
|
||||
typedef struct
|
||||
{
|
||||
NvU64 hmm_address NV_ALIGN_BYTES(8); // In
|
||||
NvU64 hmm_length NV_ALIGN_BYTES(8); // In
|
||||
NvU64 uvm_address NV_ALIGN_BYTES(8); // In
|
||||
NvU64 uvm_length NV_ALIGN_BYTES(8); // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_HMM_SANITY_PARAMS;
|
||||
|
||||
// Forces the next range covering the lookup_address to fail in
|
||||
// uvm_va_range_add_gpu_va_space() with an out-of-memory error. Only the next
|
||||
// uvm_va_range_add_gpu_va_space() will fail. Subsequent ones will succeed.
|
||||
@ -1435,12 +1426,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED_PARAMS;
|
||||
|
||||
#define UVM_TEST_HMM_INIT UVM_TEST_IOCTL_BASE(97)
|
||||
typedef struct
|
||||
{
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_HMM_INIT_PARAMS;
|
||||
|
||||
#define UVM_TEST_SPLIT_INVALIDATE_DELAY UVM_TEST_IOCTL_BASE(98)
|
||||
typedef struct
|
||||
{
|
||||
@ -1448,6 +1433,11 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS;
|
||||
|
||||
#define UVM_TEST_CPU_CHUNK_API UVM_TEST_IOCTL_BASE(100)
|
||||
typedef struct
|
||||
{
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_CPU_CHUNK_API_PARAMS;
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2019 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -498,6 +498,7 @@ bool uvm_thread_context_add(uvm_thread_context_t *thread_context)
|
||||
uvm_thread_context_global_init();
|
||||
|
||||
thread_context->task = current;
|
||||
thread_context->ignore_hmm_invalidate_va_block = NULL;
|
||||
table_entry = thread_context_non_interrupt_table_entry(&array_index);
|
||||
return thread_context_non_interrupt_add(thread_context, table_entry, array_index);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2019 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -70,6 +70,12 @@ struct uvm_thread_context_struct
|
||||
// This field is ignored in interrupt paths
|
||||
NvU32 array_index;
|
||||
|
||||
// Set if uvm_hmm_invalidate() callbacks should be ignored on this va_block.
|
||||
// This needs to be set whenever the va_block lock is held and
|
||||
// migrate_vma_setup() needs to be called since the "slow path" which
|
||||
// calls try_to_migrate() doesn't pass the pgmap_owner.
|
||||
uvm_va_block_t *ignore_hmm_invalidate_va_block;
|
||||
|
||||
// Pointer to enclosing node (if any) in red-black tree
|
||||
//
|
||||
// This field is ignored in interrupt paths
|
||||
|
@ -208,7 +208,6 @@ static uvm_va_space_t *tools_event_tracker_va_space(uvm_tools_event_tracker_t *e
|
||||
uvm_va_space_t *va_space;
|
||||
UVM_ASSERT(event_tracker->uvm_file);
|
||||
va_space = uvm_va_space_get(event_tracker->uvm_file);
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
return va_space;
|
||||
}
|
||||
|
||||
@ -1614,10 +1613,10 @@ NV_STATUS uvm_api_tools_init_event_tracker(UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS *
|
||||
goto fail;
|
||||
}
|
||||
|
||||
status = uvm_va_space_initialized(uvm_va_space_get(event_tracker->uvm_file));
|
||||
if (status != NV_OK) {
|
||||
if (!uvm_fd_va_space(event_tracker->uvm_file)) {
|
||||
fput(event_tracker->uvm_file);
|
||||
event_tracker->uvm_file = NULL;
|
||||
status = NV_ERR_ILLEGAL_ACTION;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -1758,7 +1757,6 @@ static NV_STATUS tools_update_status(uvm_va_space_t *va_space)
|
||||
uvm_assert_rwsem_locked_write(&g_tools_va_space_list_lock);
|
||||
uvm_assert_rwsem_locked_write(&va_space->perf_events.lock);
|
||||
uvm_assert_rwsem_locked_write(&va_space->tools.lock);
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
status = tools_update_perf_events_callbacks(va_space);
|
||||
if (status != NV_OK)
|
||||
@ -2016,13 +2014,11 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
goto exit;
|
||||
|
||||
if (is_write) {
|
||||
block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
stage_addr = uvm_mem_get_cpu_addr_kernel(stage_mem);
|
||||
*bytes = 0;
|
||||
@ -2044,11 +2040,16 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
|
||||
}
|
||||
}
|
||||
|
||||
if (mm)
|
||||
uvm_down_read_mmap_lock(mm);
|
||||
|
||||
// The RM flavor of the lock is needed to perform ECC checks.
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
status = uvm_va_block_find_create_managed(va_space, target_va_start, &block);
|
||||
status = uvm_va_block_find_create(va_space, UVM_ALIGN_DOWN(target_va_start, PAGE_SIZE), block_context, &block);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
@ -2070,6 +2071,22 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
|
||||
status = uvm_mem_map_gpu_kernel(stage_mem, gpu);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure a CPU resident page has an up to date struct page pointer.
|
||||
if (uvm_va_block_is_hmm(block)) {
|
||||
status = uvm_hmm_va_block_update_residency_info(block,
|
||||
mm,
|
||||
UVM_ALIGN_DOWN(target_va_start, PAGE_SIZE),
|
||||
true);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
@ -2082,6 +2099,9 @@ static NV_STATUS tools_access_process_memory(uvm_va_space_t *va_space,
|
||||
status = uvm_global_mask_check_ecc_error(global_gpus);
|
||||
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto exit;
|
||||
|
||||
|
@ -984,14 +984,13 @@ NV_STATUS uvm_test_check_channel_va_space(UVM_TEST_CHECK_CHANNEL_VA_SPACE_PARAMS
|
||||
goto out;
|
||||
}
|
||||
|
||||
va_space = uvm_va_space_get(va_space_filp);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// We can do this query outside of the lock, but doing it within the lock
|
||||
// simplifies error handling.
|
||||
status = uvm_va_space_initialized(va_space);
|
||||
if (status != NV_OK)
|
||||
va_space = uvm_fd_va_space(va_space_filp);
|
||||
if (!va_space) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu = uvm_va_space_get_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu || !uvm_processor_mask_test(&va_space->faultable_processors, gpu->id)) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -43,6 +43,7 @@
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
// VA blocks are the leaf nodes in the uvm_va_space tree for managed allocations
|
||||
// (VA ranges with type == UVM_VA_RANGE_TYPE_MANAGED):
|
||||
@ -328,7 +329,7 @@ struct uvm_va_block_struct
|
||||
// allocations. See uvm_pmm_sysmem.h for more details on CPU chunks.
|
||||
// This member is meant to hold an opaque value indicating the CPU
|
||||
// chunk storage method. For more details on CPU chunk storage,
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_pmm_sysmem.c.
|
||||
// see uvm_cpu_chunk_storage_type_t in uvm_va_block.c.
|
||||
unsigned long chunks;
|
||||
|
||||
// Per-page allocation bit vector.
|
||||
@ -352,12 +353,18 @@ struct uvm_va_block_struct
|
||||
// UVM_PTE_BITS_CPU_WRITE is not set, the mapping is read-only.
|
||||
// Otherwise, the mapping is read-write.
|
||||
//
|
||||
// Note that this is the maximum permissions a PTE could have, but not
|
||||
// necessarily the actual current permissions of the CPU PTEs. The UVM
|
||||
// driver will never change the PTEs without updating this state, but
|
||||
// the kernel can downgrade our CPU mappings at any time without
|
||||
// notifying the UVM driver (for example in response to user space
|
||||
// calling madvise with MADV_DONTNEED).
|
||||
// For managed allocations, this is the maximum permissions a PTE
|
||||
// could have, but not necessarily the actual current permissions of the
|
||||
// CPU PTEs. The UVM driver will never change the PTEs without updating
|
||||
// this state, but the kernel can downgrade our CPU mappings at any time
|
||||
// without notifying the UVM driver (for example in response to user
|
||||
// space calling madvise with MADV_DONTNEED).
|
||||
//
|
||||
// For HMM allocations, this is the minimum permission the CPU has since
|
||||
// Linux can upgrade a read-only PTE to read-write without notifying
|
||||
// the UVM driver. This is why read duplication isn't currently
|
||||
// supported.
|
||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||
uvm_page_mask_t pte_bits[UVM_PTE_BITS_CPU_MAX];
|
||||
|
||||
// Whether the CPU has ever mapped a page on this VA block. This is
|
||||
@ -452,6 +459,19 @@ struct uvm_va_block_struct
|
||||
// The MMU notifier is registered per va_block.
|
||||
struct mmu_interval_notifier notifier;
|
||||
|
||||
// Wait queue for GPU atomic operations to system memory.
|
||||
struct wait_queue_head atomic_waitq;
|
||||
|
||||
// Mask of pages being migrated to system memory for GPU atomic access.
|
||||
// It is used so other threads don't try to migrate those pages while
|
||||
// make_device_exclusive_range() is called without holding the va_block
|
||||
// lock.
|
||||
uvm_page_mask_t atomic_busy;
|
||||
|
||||
// Sequence number to tell if any changes were made to the va_block
|
||||
// while not holding the block lock and calling hmm_range_fault().
|
||||
unsigned long changed;
|
||||
|
||||
// Parent VA space pointer. It is NULL for managed blocks or if
|
||||
// the HMM block is dead. This field can be read while holding the
|
||||
// block lock and is only modified while holding the va_space write
|
||||
@ -522,7 +542,7 @@ struct uvm_va_block_wrapper_struct
|
||||
};
|
||||
|
||||
// Tracking needed for supporting allocation-retry of user GPU memory
|
||||
typedef struct
|
||||
struct uvm_va_block_retry_struct
|
||||
{
|
||||
// A tracker used for all allocations from PMM.
|
||||
uvm_tracker_t tracker;
|
||||
@ -537,7 +557,7 @@ typedef struct
|
||||
// can contain chunks from multiple GPUs. All the used chunks are unpinned
|
||||
// when the operation is finished with uvm_va_block_retry_deinit().
|
||||
struct list_head used_chunks;
|
||||
} uvm_va_block_retry_t;
|
||||
};
|
||||
|
||||
// Module load/exit
|
||||
NV_STATUS uvm_va_block_init(void);
|
||||
@ -635,6 +655,12 @@ uvm_va_space_t *uvm_va_block_get_va_space_maybe_dead(uvm_va_block_t *va_block);
|
||||
// is held in write mode.
|
||||
uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block);
|
||||
|
||||
// Return true if the VA space has access counter migrations enabled and should
|
||||
// remote map pages evicted to system memory. This is OK since access counters
|
||||
// can pull the data back to vidmem if sufficient accesses trigger a migration.
|
||||
// The caller must ensure that the VA space cannot go away.
|
||||
bool uvm_va_space_map_remote_on_eviction(uvm_va_space_t *va_space);
|
||||
|
||||
// Dynamic cache-based allocation for uvm_va_block_context_t.
|
||||
//
|
||||
// See uvm_va_block_context_init() for a description of the mm parameter.
|
||||
@ -663,7 +689,7 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
// This always returns true and is intended to only be used with UVM_ASSERT().
|
||||
// Locking: the va_block lock must be held.
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy,
|
||||
const uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
@ -697,7 +723,7 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
//
|
||||
// Allocation-retry: this operation may need to perform eviction to be able to
|
||||
// allocate GPU memory successfully and if that happens,
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED will be returned. That also means that the
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED will be returned. That also means that the
|
||||
// block's lock has been unlocked and relocked as part of the call and that the
|
||||
// whole sequence of operations performed under the block's lock needs to be
|
||||
// attempted again. To facilitate that, the caller needs to provide the same
|
||||
@ -707,14 +733,15 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
// caller.
|
||||
//
|
||||
// If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of
|
||||
// user memory is guaranteed not to happen. Allocation-retry of page tables can
|
||||
// still occur though.
|
||||
// user memory is guaranteed not to happen. Allocation-retry of GPU page tables
|
||||
// can still occur though.
|
||||
//
|
||||
// va_block_context must not be NULL. This function will set a bit in
|
||||
// va_block_context->make_resident.pages_changed_residency for each page that
|
||||
// changed residency (due to a migration or first population) as a result of the
|
||||
// operation. This function only sets bits in that mask. It is the caller's
|
||||
// responsiblity to zero the mask or not first.
|
||||
// operation and va_block_context->make_resident.all_involved_processors for
|
||||
// each processor involved in the copy. This function only sets bits in those
|
||||
// masks. It is the caller's responsiblity to zero the masks or not first.
|
||||
//
|
||||
// va_block_context->policy must also be set by the caller for the given region.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
@ -723,6 +750,8 @@ bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
// have been unlocked and relocked.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
// If va_block_context->mm != NULL, va_block_context->mm->mmap_lock must be
|
||||
// held in at least read mode.
|
||||
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@ -743,8 +772,6 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
// are broken
|
||||
// - Only managed va_blocks are supported.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
// - LOCKING: If va_block_context->mm != NULL, va_block_context->mm->mmap_lock
|
||||
// must be held in at least read mode.
|
||||
NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
@ -756,15 +783,19 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
|
||||
// Similar to uvm_va_block_make_resident() (read documentation there). The
|
||||
// difference is that source pages are only copied to the destination and the
|
||||
// residency is not updated until uvm_va_block_make_resident_post() is called.
|
||||
// Otherwise, the combination of uvm_va_block_make_resident_pre() and
|
||||
// uvm_va_block_make_resident_post() should be the same as just calling
|
||||
// uvm_va_block_make_resident().
|
||||
// residency is not updated until uvm_va_block_make_resident_finish() is called.
|
||||
// Otherwise, the combination of uvm_va_block_make_resident_copy() and
|
||||
// uvm_va_block_make_resident_finish() is the same as just calling
|
||||
// uvm_va_block_make_resident(). Note, however, that the va_block lock must be
|
||||
// held across the two calls for the operation to be complete. The va_block
|
||||
// lock can be dropped after calling uvm_va_block_make_resident_copy() but
|
||||
// uvm_va_block_make_resident_copy() must be called again after relocking the
|
||||
// va_block lock and before calling uvm_va_block_make_resident_finish().
|
||||
// This split is needed when using migrate_vma_setup() and migrate_vma_pages()
|
||||
// so that when migrate_vma_pages() indicates a page is not migrating, the
|
||||
// va_block state is not updated.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
|
||||
NV_STATUS uvm_va_block_make_resident_copy(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
@ -774,10 +805,10 @@ NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// The page_mask must be the same or a subset of the page_mask passed to
|
||||
// uvm_va_block_make_resident_pre(). This step updates the residency and breaks
|
||||
// uvm_va_block_make_resident_copy(). This step updates the residency and breaks
|
||||
// read duplication.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
|
||||
void uvm_va_block_make_resident_finish(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask);
|
||||
@ -905,7 +936,8 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
|
||||
//
|
||||
// LOCKING: The caller must hold the VA block lock.
|
||||
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Maps the given processor to all resident pages in this block, as allowed by
|
||||
// location and policy. Waits for the operation to complete before returning.
|
||||
@ -921,6 +953,22 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id);
|
||||
|
||||
// Maps given processor to all resident pages in this block and region, as
|
||||
// allowed by location and policy. The caller is responsible for waiting for
|
||||
// the tracker after all mappings have been started.
|
||||
// This function can be called with HMM and managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock and
|
||||
// va_block_context->mm->mmap_lock must be held in at least read mode.
|
||||
NV_STATUS uvm_va_block_set_accessed_by_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
||||
// Breaks SetAccessedBy and remote mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
@ -1124,6 +1172,11 @@ void uvm_va_block_unmap_preferred_location_uvm_lite(uvm_va_block_t *va_block, uv
|
||||
// must hold mm->mmap_lock in at least read mode.
|
||||
void uvm_va_block_unregister_gpu(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Same as uvm_va_block_unregister_gpu() but the VA block lock must be held.
|
||||
// Note that this handles allocation-retry internally and hence might unlock
|
||||
// and relock block's lock.
|
||||
void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm);
|
||||
|
||||
// Unmaps all memory associated with the block and drops the ref count of the
|
||||
// block. This allows the caller to free resources associated with this block
|
||||
// regardless of the block's current ref count. Most importantly it allows the
|
||||
@ -1186,9 +1239,13 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
|
||||
// Performs any operations necessary to establish a coherent mapping
|
||||
// (migrations, cache invalidates, etc.) in response to the given service block
|
||||
// context
|
||||
// context.
|
||||
//
|
||||
// service_context->block_context.policy is set by this function.
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint is set by this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
@ -1197,8 +1254,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
// - va_block lock must be held
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock was
|
||||
// unlocked and relocked.
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
@ -1209,6 +1266,77 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// Performs population of the destination pages, unmapping and copying source
|
||||
// pages to new_residency.
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint should be set before calling this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
// read mode, if valid.
|
||||
// - va_space lock must be held in at least read mode
|
||||
// - va_block lock must be held
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
// Any other error code different than NV_OK indicates OOM or a global fatal
|
||||
// error.
|
||||
NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// This updates the va_block residency state and maps the faulting processor_id
|
||||
// to the new residency (which may be remote).
|
||||
//
|
||||
// service_context must not be NULL and service_context->block_context.policy
|
||||
// must be valid. See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must be initialized by calling uvm_va_block_service_copy()
|
||||
// before calling this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
// read mode, if valid.
|
||||
// - va_space lock must be held in at least read mode
|
||||
// - va_block lock must be held
|
||||
// - the mmap lock and va_space lock must be held across the calls to
|
||||
// uvm_va_block_service_copy() and this function. If the va_block lock is
|
||||
// dropped inbetween, special care is needed to check for eviction and
|
||||
// invalidation callbacks.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the block's lock
|
||||
// was unlocked and relocked.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that thrashing has been detected
|
||||
// and the performance heuristics logic decided to throttle execution.
|
||||
// Any other error code different than NV_OK indicates OOM or a global fatal
|
||||
// error.
|
||||
NV_STATUS uvm_va_block_service_finish(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_service_block_context_t *service_context);
|
||||
|
||||
// Allocate GPU state for the given va_block and registered GPUs.
|
||||
// Locking: The block lock must be held.
|
||||
NV_STATUS uvm_va_block_gpu_state_alloc(uvm_va_block_t *va_block);
|
||||
|
||||
// Release any GPU or policy data associated with the given region in response
|
||||
// to munmap().
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Size of the block in bytes. Guaranteed to be a page-aligned value between
|
||||
// PAGE_SIZE and UVM_VA_BLOCK_SIZE.
|
||||
static inline NvU64 uvm_va_block_size(uvm_va_block_t *block)
|
||||
@ -1275,7 +1403,7 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
|
||||
// VA block lookup functions. There are a number of permutations which might be
|
||||
// useful, such as looking up the block from {va_space, va_range} x {addr,
|
||||
// block index}. The ones implemented here and in uvm_va_range.h support the
|
||||
// primary two use cases, which are:
|
||||
// primary three use cases, which are:
|
||||
// 1) Iterating over all VA blocks in a VA range. This uses block indices on the
|
||||
// VA range:
|
||||
// uvm_va_range_num_blocks
|
||||
@ -1286,6 +1414,9 @@ const uvm_page_mask_t *uvm_va_block_map_mask_get(uvm_va_block_t *block, uvm_proc
|
||||
// VA space and address:
|
||||
// uvm_va_block_find
|
||||
// uvm_va_block_find_create
|
||||
// 3) Operating on a single VA block (fault). This looks up the block using the
|
||||
// supplied VA range and address:
|
||||
// uvm_va_block_find_create_in_range
|
||||
|
||||
// Finds the UVM or HMM VA block containing addr, if any. The va_space->lock
|
||||
// must be held in at least read mode. Return values:
|
||||
@ -1315,6 +1446,15 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that va_range lookup was already done
|
||||
// by the caller. If the supplied va_range is NULL, this function behaves just
|
||||
// like when the va_range lookup in uvm_va_block_find_create is NULL.
|
||||
NV_STATUS uvm_va_block_find_create_in_range(uvm_va_space_t *va_space,
|
||||
uvm_va_range_t *va_range,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that only managed va_blocks are
|
||||
// created if not already present in the VA range.
|
||||
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
@ -1324,15 +1464,10 @@ static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
|
||||
}
|
||||
|
||||
// Look up a chunk backing a specific address within the VA block. Returns NULL if none.
|
||||
// Look up a chunk backing a specific address within the VA block.
|
||||
// Returns NULL if none.
|
||||
uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu_t *gpu, NvU64 address);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT,
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||
} uvm_migrate_mode_t;
|
||||
|
||||
// Implementation of the UvmMigrate() API at the VA block scope.
|
||||
//
|
||||
// The out_tracker can be NULL.
|
||||
@ -1345,6 +1480,8 @@ typedef enum
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
// If va_block is a HMM block, va_block_context->hmm.vma must be valid.
|
||||
// See the comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
|
||||
// NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@ -1362,7 +1499,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy.
|
||||
// va_block_context->policy or va_block_context->hmm.vma.
|
||||
//
|
||||
// The caller needs to support allocation-retry of page tables.
|
||||
//
|
||||
@ -1416,6 +1553,8 @@ static uvm_page_index_t uvm_va_block_cpu_page_index(uvm_va_block_t *va_block, Nv
|
||||
|
||||
// Computes the size and index in the gpu_state chunks array of the GPU chunk
|
||||
// which corresponds to the given page_index of the VA region.
|
||||
// Note this is only used for testing and does not work on HMM va_blocks as it
|
||||
// returns incorrect results for those.
|
||||
size_t uvm_va_block_gpu_chunk_index_range(NvU64 start,
|
||||
NvU64 size,
|
||||
uvm_gpu_t *gpu,
|
||||
@ -1868,77 +2007,66 @@ static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block,
|
||||
// Helpers for page state (permissions, size, residency)
|
||||
//
|
||||
|
||||
// Compute the gpus that have at least the given access permissions for the
|
||||
// range described by region and page_mask. The function sets the bit if any
|
||||
// page in the region has the permissions.
|
||||
void uvm_va_block_region_authorized_gpus(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_gpus);
|
||||
|
||||
// Compute the processors that have at least the given access permissions for the
|
||||
// range described by region and page_mask. The function sets the bit if any
|
||||
// page in the region has the permissions.
|
||||
void uvm_va_block_region_authorized_processors(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_processors);
|
||||
|
||||
void uvm_va_block_page_authorized_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_gpus);
|
||||
|
||||
void uvm_va_block_page_authorized_processors(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t access_permission,
|
||||
uvm_processor_mask_t *authorized_processors);
|
||||
|
||||
bool uvm_va_block_is_gpu_authorized_on_whole_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_is_processor_authorized_on_whole_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_page_is_gpu_authorized(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
bool uvm_va_block_page_is_processor_authorized(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_prot_t required_prot);
|
||||
|
||||
// Compute the gpus that have a copy of the given page resident in their memory
|
||||
void uvm_va_block_page_resident_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_mask_t *resident_gpus);
|
||||
|
||||
// Compute the processors that have a copy of the given page resident in their memory
|
||||
// Compute the processors that have a copy of the given page resident in their
|
||||
// memory.
|
||||
void uvm_va_block_page_resident_processors(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_mask_t *resident_processors);
|
||||
|
||||
// Count how many processors have a copy of the given page resident in their memory
|
||||
// Count how many processors have a copy of the given page resident in their
|
||||
// memory.
|
||||
NvU32 uvm_va_block_page_resident_processors_count(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Get the processor with a resident copy of a page closest to the given processor
|
||||
// Get the processor with a resident copy of a page closest to the given
|
||||
// processor.
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor);
|
||||
|
||||
uvm_processor_id_t uvm_va_block_page_get_closest_resident_in_mask(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor,
|
||||
const uvm_processor_mask_t *processor_mask);
|
||||
// Insert a CPU chunk at the given page_index into the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_cpu_chunk_insert_in_block(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Remove a CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Return the CPU chunk at the given page_index from the va_block.
|
||||
// Locking: The va_block lock must be held.
|
||||
struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Physically unmap a CPU chunk from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Remove any CPU chunks in the given region.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);
|
||||
|
||||
// Get CPU page size or 0 if it is not mapped
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get GPU page size or 0 if it is not mapped on the given GPU
|
||||
NvU32 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
|
||||
@ -1999,14 +2127,14 @@ size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t pa
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
|
||||
// and uvm_hmm_va_block_context_vma_is_valid() in uvm_hmm.h.
|
||||
// and uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
NvU32 access_type_mask,
|
||||
uvm_va_policy_t *policy,
|
||||
const uvm_va_policy_t *policy,
|
||||
const uvm_perf_thrashing_hint_t *thrashing_hint,
|
||||
uvm_service_operation_t operation,
|
||||
bool *read_duplicate);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -27,6 +27,11 @@
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_pte_batch.h"
|
||||
#include "uvm_tlb_batch.h"
|
||||
#include "uvm_forward_decl.h"
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
#include <linux/migrate.h>
|
||||
#endif
|
||||
|
||||
// UVM_VA_BLOCK_BITS is 21, meaning the maximum block size is 2MB. Rationale:
|
||||
// - 2MB matches the largest Pascal GPU page size so it's a natural fit
|
||||
@ -161,8 +166,12 @@ typedef struct
|
||||
{
|
||||
// Masks used internally
|
||||
uvm_page_mask_t page_mask;
|
||||
uvm_page_mask_t copy_resident_pages_between_mask;
|
||||
uvm_page_mask_t copy_resident_pages_mask;
|
||||
uvm_page_mask_t pages_staged;
|
||||
|
||||
// This is used to store which pages were successfully copied to the
|
||||
// destination processor and used by uvm_va_block_make_resident_finish()
|
||||
// to update the va_block state.
|
||||
uvm_page_mask_t pages_migrated;
|
||||
|
||||
// Out mask filled in by uvm_va_block_make_resident to indicate which
|
||||
@ -225,13 +234,24 @@ typedef struct
|
||||
// the mm, such as creating CPU mappings.
|
||||
struct mm_struct *mm;
|
||||
|
||||
uvm_va_policy_t *policy;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
struct
|
||||
{
|
||||
// These are used for migrate_vma_*(), hmm_range_fault(), and
|
||||
// make_device_exclusive_range() handling.
|
||||
unsigned long src_pfns[PAGES_PER_UVM_VA_BLOCK];
|
||||
union {
|
||||
unsigned long dst_pfns[PAGES_PER_UVM_VA_BLOCK];
|
||||
struct page *pages[PAGES_PER_UVM_VA_BLOCK];
|
||||
};
|
||||
|
||||
// Cached VMA pointer. This is only valid while holding the mmap_lock.
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
// Used for migrate_vma_*() to migrate pages to/from GPU/CPU.
|
||||
struct migrate_vma migrate_vma_args;
|
||||
} hmm;
|
||||
#endif
|
||||
|
||||
@ -242,8 +262,7 @@ typedef struct
|
||||
typedef enum
|
||||
{
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_MOVE = 1,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY = 3
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2
|
||||
} uvm_va_block_transfer_mode_t;
|
||||
|
||||
struct uvm_reverse_map_struct
|
||||
@ -266,4 +285,10 @@ typedef enum
|
||||
UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
|
||||
} uvm_service_operation_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT,
|
||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||
} uvm_migrate_mode_t;
|
||||
|
||||
#endif
|
||||
|
@ -29,23 +29,23 @@
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_range.h"
|
||||
|
||||
uvm_va_policy_t uvm_va_policy_default __read_mostly = {
|
||||
const uvm_va_policy_t uvm_va_policy_default = {
|
||||
.preferred_location = UVM_ID_INVALID,
|
||||
.read_duplication = UVM_READ_DUPLICATION_UNSET,
|
||||
};
|
||||
|
||||
bool uvm_va_policy_is_read_duplicate(uvm_va_policy_t *policy, uvm_va_space_t *va_space)
|
||||
bool uvm_va_policy_is_read_duplicate(const uvm_va_policy_t *policy, uvm_va_space_t *va_space)
|
||||
{
|
||||
return policy->read_duplication == UVM_READ_DUPLICATION_ENABLED &&
|
||||
uvm_va_space_can_read_duplicate(va_space, NULL);
|
||||
}
|
||||
|
||||
uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
|
||||
const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, addr);
|
||||
const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, addr);
|
||||
|
||||
return node ? &node->policy : &uvm_va_policy_default;
|
||||
}
|
||||
@ -63,14 +63,6 @@ static uvm_va_policy_node_t *uvm_va_policy_node_container(uvm_range_tree_node_t
|
||||
return container_of(tree_node, uvm_va_policy_node_t, node);
|
||||
}
|
||||
|
||||
static uvm_va_policy_t *uvm_va_policy_container(uvm_range_tree_node_t *tree_node)
|
||||
{
|
||||
if (!tree_node)
|
||||
return NULL;
|
||||
|
||||
return &uvm_va_policy_node_container(tree_node)->policy;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_policy_init(void)
|
||||
{
|
||||
g_uvm_va_policy_node_cache = NV_KMEM_CACHE_CREATE("uvm_va_policy_node_t", uvm_va_policy_node_t);
|
||||
@ -173,7 +165,7 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block,
|
||||
return uvm_va_policy_node_container(tree_node);
|
||||
}
|
||||
|
||||
uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
|
||||
NvU64 start,
|
||||
NvU64 end,
|
||||
uvm_va_policy_node_t **out_node,
|
||||
@ -181,7 +173,7 @@ uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
|
||||
{
|
||||
uvm_range_tree_node_t *tree_node;
|
||||
uvm_va_policy_node_t *node;
|
||||
uvm_va_policy_t *policy;
|
||||
const uvm_va_policy_t *policy;
|
||||
uvm_va_block_region_t region;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
@ -219,8 +211,8 @@ uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
|
||||
return policy;
|
||||
}
|
||||
|
||||
uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy,
|
||||
const uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block,
|
||||
const uvm_va_policy_t *policy,
|
||||
NvU64 end,
|
||||
uvm_va_policy_node_t **inout_node,
|
||||
uvm_va_block_region_t *inout_region)
|
||||
@ -234,7 +226,7 @@ uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block,
|
||||
|
||||
next = uvm_va_policy_node_iter_next(va_block, node, end);
|
||||
|
||||
if (policy == &uvm_va_policy_default) {
|
||||
if (uvm_va_policy_is_default(policy)) {
|
||||
// We haven't used the current policy node yet so use it now.
|
||||
next = node;
|
||||
policy = &node->policy;
|
||||
@ -563,4 +555,39 @@ NV_STATUS uvm_va_policy_set_range(uvm_va_block_t *va_block,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
const uvm_va_policy_t *uvm_va_policy_set_preferred_location(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_processor_id_t processor_id,
|
||||
const uvm_va_policy_t *old_policy)
|
||||
{
|
||||
NvU64 start = uvm_va_block_region_start(va_block, region);
|
||||
NvU64 end = uvm_va_block_region_end(va_block, region);
|
||||
uvm_va_policy_node_t *node;
|
||||
|
||||
if (uvm_va_policy_is_default(old_policy)) {
|
||||
|
||||
UVM_ASSERT(!UVM_ID_IS_INVALID(processor_id));
|
||||
UVM_ASSERT(!uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end));
|
||||
|
||||
node = uvm_va_policy_node_create(va_block, start, end);
|
||||
if (!node)
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
// Since the old_policy isn't the constant default policy, we know it
|
||||
// is an allocated uvm_va_policy_node_t and can be cast.
|
||||
node = container_of((uvm_va_policy_t *)old_policy, uvm_va_policy_node_t, policy);
|
||||
|
||||
// The caller guarantees that the policy node doesn't require splitting
|
||||
// and that the policy is changing.
|
||||
UVM_ASSERT(node->node.start >= start);
|
||||
UVM_ASSERT(node->node.end <= end);
|
||||
UVM_ASSERT(!uvm_id_equal(node->policy.preferred_location, processor_id));
|
||||
}
|
||||
|
||||
node->policy.preferred_location = processor_id;
|
||||
|
||||
return &node->policy;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user