520.61.05

This commit is contained in:
Andy Ritger 2022-10-10 14:59:24 -07:00
parent fe0728787f
commit 90eb10774f
No known key found for this signature in database
GPG Key ID: 6D466BB75E006CFC
758 changed files with 88383 additions and 26493 deletions

View File

@ -1,5 +1,19 @@
# Changelog # Changelog
## Release 520 Entries
### [520.61.05] 2022-10-10
#### Added
- Introduce support for NVIDIA H100 GPUs.
#### Fixed
- Fix/Improve Makefile, [#308](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/308/) by @izenynn
- Make nvLogBase2 more efficient, [#177](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/177/) by @DMaroo
- nv-pci: fixed always true expression, [#195](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/195/) by @ValZapod
## Release 515 Entries ## Release 515 Entries
### [515.76] 2022-09-20 ### [515.76] 2022-09-20

View File

@ -6,9 +6,9 @@
# To install the build kernel modules: run (as root) `make modules_install` # To install the build kernel modules: run (as root) `make modules_install`
########################################################################### ###########################################################################
include utils.mk ###########################################################################
# variables
all: modules ###########################################################################
nv_kernel_o = src/nvidia/$(OUTPUTDIR)/nv-kernel.o nv_kernel_o = src/nvidia/$(OUTPUTDIR)/nv-kernel.o
nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
@ -16,13 +16,20 @@ nv_kernel_o_binary = kernel-open/nvidia/nv-kernel.o_binary
nv_modeset_kernel_o = src/nvidia-modeset/$(OUTPUTDIR)/nv-modeset-kernel.o nv_modeset_kernel_o = src/nvidia-modeset/$(OUTPUTDIR)/nv-modeset-kernel.o
nv_modeset_kernel_o_binary = kernel-open/nvidia-modeset/nv-modeset-kernel.o_binary nv_modeset_kernel_o_binary = kernel-open/nvidia-modeset/nv-modeset-kernel.o_binary
.PHONY: $(nv_kernel_o) $(nv_modeset_kernel_o) modules modules_install ###########################################################################
# rules
###########################################################################
include utils.mk
.PHONY: all
all: modules
########################################################################### ###########################################################################
# nv-kernel.o is the OS agnostic portion of nvidia.ko # nv-kernel.o is the OS agnostic portion of nvidia.ko
########################################################################### ###########################################################################
.PHONY: $(nv_kernel_o)
$(nv_kernel_o): $(nv_kernel_o):
$(MAKE) -C src/nvidia $(MAKE) -C src/nvidia
@ -34,6 +41,7 @@ $(nv_kernel_o_binary): $(nv_kernel_o)
# nv-modeset-kernel.o is the OS agnostic portion of nvidia-modeset.ko # nv-modeset-kernel.o is the OS agnostic portion of nvidia-modeset.ko
########################################################################### ###########################################################################
.PHONY: $(nv_modeset_kernel_o)
$(nv_modeset_kernel_o): $(nv_modeset_kernel_o):
$(MAKE) -C src/nvidia-modeset $(MAKE) -C src/nvidia-modeset
@ -46,31 +54,33 @@ $(nv_modeset_kernel_o_binary): $(nv_modeset_kernel_o)
# the kernel modules with kbuild. # the kernel modules with kbuild.
########################################################################### ###########################################################################
.PHONY: modules
modules: $(nv_kernel_o_binary) $(nv_modeset_kernel_o_binary) modules: $(nv_kernel_o_binary) $(nv_modeset_kernel_o_binary)
$(MAKE) -C kernel-open modules $(MAKE) -C kernel-open modules
########################################################################### ###########################################################################
# Install the built kernel modules using kbuild. # Install the built kernel modules using kbuild.
########################################################################### ###########################################################################
.PHONY: modules_install
modules_install: modules_install:
$(MAKE) -C kernel-open modules_install $(MAKE) -C kernel-open modules_install
########################################################################### ###########################################################################
# clean # clean
########################################################################### ###########################################################################
.PHONY: clean nvidia.clean nvidia-modeset.clean kernel-open.clean .PHONY: clean
clean: nvidia.clean nvidia-modeset.clean kernel-open.clean clean: nvidia.clean nvidia-modeset.clean kernel-open.clean
.PHONY: nvidia.clean
nvidia.clean: nvidia.clean:
$(MAKE) -C src/nvidia clean $(MAKE) -C src/nvidia clean
.PHONY: nvidia-modeset.clean
nvidia-modeset.clean: nvidia-modeset.clean:
$(MAKE) -C src/nvidia-modeset clean $(MAKE) -C src/nvidia-modeset clean
.PHONY: kernel-open.clean
kernel-open.clean: kernel-open.clean:
$(MAKE) -C kernel-open clean $(MAKE) -C kernel-open clean

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source # NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules, This is the source release of the NVIDIA Linux open GPU kernel modules,
version 515.76. version 520.61.05.
## How to Build ## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with gsp.bin Note that the kernel modules built here must be used with gsp.bin
firmware and user-space NVIDIA GPU driver components from a corresponding firmware and user-space NVIDIA GPU driver components from a corresponding
515.76 driver release. This can be achieved by installing 520.61.05 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g., option. E.g.,
@ -167,7 +167,7 @@ for the target kernel.
## Compatible GPUs ## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 515.76 release, (see the table below). However, in the 520.61.05 release,
GeForce and Workstation support is still considered alpha-quality. GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs, To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user parameter to 1. For more details, see the NVIDIA GPU driver end user
README here: README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/515.76/README/kernel_open.html https://us.download.nvidia.com/XFree86/Linux-x86_64/520.61.05/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"515.76\" EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.61.05\"
EXTRA_CFLAGS += -Wno-unused-function EXTRA_CFLAGS += -Wno-unused-function

View File

@ -101,13 +101,6 @@
# define NV_ANDROID # define NV_ANDROID
#endif #endif
#if defined(DceCore) && !defined(NV_DCECORE) #if defined(DceCore) && !defined(NV_DCECORE)
# define NV_DCECORE # define NV_DCECORE
#endif #endif
@ -355,15 +348,6 @@
#define NVOS_IS_INTEGRITY 0 #define NVOS_IS_INTEGRITY 0
#endif #endif
#if defined(NVCPU_X86) #if defined(NVCPU_X86)
#define NVCPU_IS_X86 1 #define NVCPU_IS_X86 1
#else #else

View File

@ -1037,6 +1037,32 @@ static inline vm_fault_t nv_insert_pfn(struct vm_area_struct *vma,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
/* Converts BAR index to Linux specific PCI BAR index */
static inline NvU8 nv_bar_index_to_os_bar_index
(
struct pci_dev *dev,
NvU8 nv_bar_index
)
{
NvU8 bar_index = 0;
NvU8 i;
BUG_ON(nv_bar_index >= NV_GPU_NUM_BARS);
for (i = 0; i < nv_bar_index; i++)
{
if (NV_PCI_RESOURCE_FLAGS(dev, bar_index) & PCI_BASE_ADDRESS_MEM_TYPE_64)
{
bar_index += 2;
}
else
{
bar_index++;
}
}
return bar_index;
}
#define NV_PAGE_MASK (NvU64)(long)PAGE_MASK #define NV_PAGE_MASK (NvU64)(long)PAGE_MASK
@ -1161,16 +1187,6 @@ typedef struct nvidia_pte_s {
unsigned int page_count; unsigned int page_count;
} nvidia_pte_t; } nvidia_pte_t;
typedef struct nv_alloc_s { typedef struct nv_alloc_s {
struct nv_alloc_s *next; struct nv_alloc_s *next;
struct device *dev; struct device *dev;
@ -1413,34 +1429,6 @@ struct os_wait_queue {
struct completion q; struct completion q;
}; };
/* /*
* To report error in msi/msix when unhandled count reaches a threshold * To report error in msi/msix when unhandled count reaches a threshold
*/ */
@ -1464,19 +1452,6 @@ struct nv_dma_device {
NvBool nvlink; NvBool nvlink;
}; };
/* linux-specific version of old nv_state_t */ /* linux-specific version of old nv_state_t */
/* this is a general os-specific state structure. the first element *must* be /* this is a general os-specific state structure. the first element *must* be
the general state structure, for the generic unix-based code */ the general state structure, for the generic unix-based code */
@ -1492,11 +1467,6 @@ typedef struct nv_linux_state_s {
/* IBM-NPU info associated with this GPU */ /* IBM-NPU info associated with this GPU */
nv_ibmnpu_info_t *npu; nv_ibmnpu_info_t *npu;
/* NUMA node information for the platforms where GPU memory is presented /* NUMA node information for the platforms where GPU memory is presented
* as a NUMA node to the kernel */ * as a NUMA node to the kernel */
struct { struct {
@ -1576,23 +1546,6 @@ typedef struct nv_linux_state_s {
/* Per-device notifier block for ACPI events */ /* Per-device notifier block for ACPI events */
struct notifier_block acpi_nb; struct notifier_block acpi_nb;
/* Lock serializing ISRs for different SOC vectors */ /* Lock serializing ISRs for different SOC vectors */
nv_spinlock_t soc_isr_lock; nv_spinlock_t soc_isr_lock;
@ -1760,12 +1713,10 @@ static inline struct kmem_cache *nv_kmem_cache_create(const char *name, unsigned
return cache; return cache;
} }
#if defined(CONFIG_PCI_IOV) #if defined(CONFIG_PCI_IOV)
#define NV_PCI_SRIOV_SUPPORT #define NV_PCI_SRIOV_SUPPORT
#endif /* CONFIG_PCI_IOV */ #endif /* CONFIG_PCI_IOV */
#define NV_PCIE_CFG_MAX_OFFSET 0x1000 #define NV_PCIE_CFG_MAX_OFFSET 0x1000
#include "nv-proto.h" #include "nv-proto.h"
@ -1959,11 +1910,6 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32); NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
static inline void nv_mutex_destroy(struct mutex *lock) static inline void nv_mutex_destroy(struct mutex *lock)
{ {
mutex_destroy(lock); mutex_destroy(lock);

View File

@ -47,55 +47,37 @@ typedef int vm_fault_t;
* *
*/ */
#if defined(NV_GET_USER_PAGES_HAS_TASK_STRUCT) #if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS) #define NV_GET_USER_PAGES get_user_pages
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \ #elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas) #define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
#else get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
#include <linux/mm.h>
#include <linux/sched.h>
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
int write,
int force,
struct page **pages,
struct vm_area_struct **vmas)
{
unsigned int flags = 0;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
return get_user_pages(current, current->mm, start, nr_pages, flags,
pages, vmas);
}
#endif
#else #else
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS) #include <linux/mm.h>
#define NV_GET_USER_PAGES get_user_pages #include <linux/sched.h>
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
int write,
int force,
struct page **pages,
struct vm_area_struct **vmas)
{
unsigned int flags = 0;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
return get_user_pages(current, current->mm, start, nr_pages, flags,
pages, vmas);
#else #else
#include <linux/mm.h> // remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
return get_user_pages(start, nr_pages, flags, pages, vmas);
static inline long NV_GET_USER_PAGES(unsigned long start,
unsigned long nr_pages,
int write,
int force,
struct page **pages,
struct vm_area_struct **vmas)
{
unsigned int flags = 0;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;
return get_user_pages(start, nr_pages, flags, pages, vmas);
}
#endif #endif
}
#endif #endif
/* /*
@ -131,7 +113,7 @@ typedef int vm_fault_t;
*/ */
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT) #if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS) #if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote #define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
#else #else
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk, static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
@ -150,26 +132,21 @@ typedef int vm_fault_t;
if (force) if (force)
flags |= FOLL_FORCE; flags |= FOLL_FORCE;
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG) #if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
#if defined (NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG) return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
return get_user_pages_remote(tsk, mm, start, nr_pages, flags, pages, vmas);
pages, vmas, NULL); #elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
#else return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
return get_user_pages_remote(mm, start, nr_pages, flags, pages, vmas, NULL);
pages, vmas, NULL);
#endif
#else #else
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
return get_user_pages_remote(tsk, mm, start, nr_pages, flags, return get_user_pages_remote(mm, start, nr_pages, flags,
pages, vmas); pages, vmas, NULL);
#endif #endif
} }
#endif #endif
#else #else
#if defined(NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS) #if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
#define NV_GET_USER_PAGES_REMOTE get_user_pages #define NV_GET_USER_PAGES_REMOTE get_user_pages
#else #else
#include <linux/mm.h> #include <linux/mm.h>

View File

@ -27,9 +27,6 @@
#include "nv-pci.h" #include "nv-pci.h"
#include "nv-register-module.h" #include "nv-register-module.h"
extern const char *nv_device_name; extern const char *nv_device_name;
extern nvidia_module_t nv_fops; extern nvidia_module_t nv_fops;

View File

@ -47,11 +47,6 @@ extern nv_cap_t *nvidia_caps_root;
extern const NvBool nv_is_rm_firmware_supported_os; extern const NvBool nv_is_rm_firmware_supported_os;
#include <nv-kernel-interface-api.h> #include <nv-kernel-interface-api.h>
/* NVIDIA's reserved major character device number (Linux). */ /* NVIDIA's reserved major character device number (Linux). */
@ -286,6 +281,7 @@ typedef struct nv_usermap_access_params_s
NvU64 access_size; NvU64 access_size;
NvU64 remap_prot_extra; NvU64 remap_prot_extra;
NvBool contig; NvBool contig;
NvU32 caching;
} nv_usermap_access_params_t; } nv_usermap_access_params_t;
/* /*
@ -303,6 +299,7 @@ typedef struct nv_alloc_mapping_context_s {
NvU64 remap_prot_extra; NvU64 remap_prot_extra;
NvU32 prot; NvU32 prot;
NvBool valid; NvBool valid;
NvU32 caching;
} nv_alloc_mapping_context_t; } nv_alloc_mapping_context_t;
typedef enum typedef enum
@ -331,6 +328,9 @@ typedef struct nv_soc_irq_info_s {
#define NV_MAX_DPAUX_NUM_DEVICES 4 #define NV_MAX_DPAUX_NUM_DEVICES 4
#define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING #define NV_MAX_SOC_DPAUX_NUM_DEVICES 2 // From SOC_DEV_MAPPING
#define NV_IGPU_LEGACY_STALL_IRQ 70
#define NV_IGPU_MAX_STALL_IRQS 3
#define NV_IGPU_MAX_NONSTALL_IRQS 1
/* /*
* per device state * per device state
*/ */
@ -367,6 +367,7 @@ typedef struct nv_state_t
nv_aperture_t *hdacodec_regs; nv_aperture_t *hdacodec_regs;
nv_aperture_t *mipical_regs; nv_aperture_t *mipical_regs;
nv_aperture_t *fb, ud; nv_aperture_t *fb, ud;
nv_aperture_t *simregs;
NvU32 num_dpaux_instance; NvU32 num_dpaux_instance;
NvU32 interrupt_line; NvU32 interrupt_line;
@ -379,6 +380,11 @@ typedef struct nv_state_t
NvU32 soc_dcb_size; NvU32 soc_dcb_size;
NvU32 disp_sw_soc_chip_id; NvU32 disp_sw_soc_chip_id;
NvU32 igpu_stall_irq[NV_IGPU_MAX_STALL_IRQS];
NvU32 igpu_nonstall_irq;
NvU32 num_stall_irqs;
NvU64 dma_mask;
NvBool primary_vga; NvBool primary_vga;
NvU32 sim_env; NvU32 sim_env;
@ -456,6 +462,9 @@ typedef struct nv_state_t
NvBool printed_openrm_enable_unsupported_gpus_error; NvBool printed_openrm_enable_unsupported_gpus_error;
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
NvBool nvpcf_dsm_in_gpu_scope;
} nv_state_t; } nv_state_t;
// These define need to be in sync with defines in system.h // These define need to be in sync with defines in system.h
@ -520,7 +529,7 @@ typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
#define NV_FLAG_USES_MSIX 0x0040 #define NV_FLAG_USES_MSIX 0x0040
#define NV_FLAG_PASSTHRU 0x0080 #define NV_FLAG_PASSTHRU 0x0080
#define NV_FLAG_SUSPENDED 0x0100 #define NV_FLAG_SUSPENDED 0x0100
// Unused 0x0200 #define NV_FLAG_SOC_IGPU 0x0200
// Unused 0x0400 // Unused 0x0400
#define NV_FLAG_PERSISTENT_SW_STATE 0x0800 #define NV_FLAG_PERSISTENT_SW_STATE 0x0800
#define NV_FLAG_IN_RECOVERY 0x1000 #define NV_FLAG_IN_RECOVERY 0x1000
@ -569,6 +578,9 @@ typedef enum
#define NV_IS_SOC_DISPLAY_DEVICE(nv) \ #define NV_IS_SOC_DISPLAY_DEVICE(nv) \
((nv)->flags & NV_FLAG_SOC_DISPLAY) ((nv)->flags & NV_FLAG_SOC_DISPLAY)
#define NV_IS_SOC_IGPU_DEVICE(nv) \
((nv)->flags & NV_FLAG_SOC_IGPU)
#define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \ #define NV_IS_DEVICE_IN_SURPRISE_REMOVAL(nv) \
(((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0) (((nv)->flags & NV_FLAG_IN_SURPRISE_REMOVAL) != 0)
@ -627,18 +639,12 @@ typedef enum
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{ {
return ((offset >= nv->regs->cpu_address) && return ((offset >= nv->regs->cpu_address) &&
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1)))); ((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
} }
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length) static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{ {
return ((nv->fb) && (offset >= nv->fb->cpu_address) && return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1)))); ((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
} }
@ -646,9 +652,6 @@ static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
{ {
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) && return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
(offset >= nv->ud.cpu_address) && (offset >= nv->ud.cpu_address) &&
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1)))); ((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
} }
@ -657,9 +660,6 @@ static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && (nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && (offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + ((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))); (nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
} }
@ -799,7 +799,7 @@ void NV_API_CALL nv_put_firmware(const void *);
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **); nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
void NV_API_CALL nv_put_file_private(void *); void NV_API_CALL nv_put_file_private(void *);
NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvU32 *, NvS32 *); NV_STATUS NV_API_CALL nv_get_device_memory_config(nv_state_t *, NvU64 *, NvU64 *, NvU32 *, NvS32 *);
NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**); NV_STATUS NV_API_CALL nv_get_ibmnpu_genreg_info(nv_state_t *, NvU64 *, NvU64 *, void**);
NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode); NV_STATUS NV_API_CALL nv_get_ibmnpu_relaxed_ordering_mode(nv_state_t *nv, NvBool *mode);
@ -850,11 +850,9 @@ void NV_API_CALL nv_dma_release_dma_buf (void *, nv_dma_buf_t *);
void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *); void NV_API_CALL nv_schedule_uvm_isr (nv_state_t *);
NvBool NV_API_CALL nv_platform_supports_s0ix (void); NvBool NV_API_CALL nv_platform_supports_s0ix (void);
NvBool NV_API_CALL nv_s2idle_pm_configured (void); NvBool NV_API_CALL nv_s2idle_pm_configured (void);
NvBool NV_API_CALL nv_is_chassis_notebook (void); NvBool NV_API_CALL nv_is_chassis_notebook (void);
void NV_API_CALL nv_allow_runtime_suspend (nv_state_t *nv); void NV_API_CALL nv_allow_runtime_suspend (nv_state_t *nv);
void NV_API_CALL nv_disallow_runtime_suspend (nv_state_t *nv); void NV_API_CALL nv_disallow_runtime_suspend (nv_state_t *nv);
@ -864,45 +862,6 @@ typedef void (*nvTegraDceClientIpcCallback)(NvU32, NvU32, NvU32, void *, void *)
NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *); NV_STATUS NV_API_CALL nv_get_num_phys_pages (void *, NvU32 *);
NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *); NV_STATUS NV_API_CALL nv_get_phys_pages (void *, void *, NvU32 *);
/* /*
* --------------------------------------------------------------------------- * ---------------------------------------------------------------------------
* *
@ -1019,7 +978,7 @@ void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *); NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
/* vGPU VFIO specific functions */ /* vGPU VFIO specific functions */
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32); NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16); NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool); NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8); NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
@ -1045,18 +1004,6 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL; return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
} }
#if defined(NVCPU_X86_64) #if defined(NVCPU_X86_64)
static inline NvU64 nv_rdtsc(void) static inline NvU64 nv_rdtsc(void)

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2013-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -201,7 +201,8 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
and will return a unique GPU virtual address. and will return a unique GPU virtual address.
The default page size will be the small page size (as returned by query The default page size will be the small page size (as returned by query
caps). The Alignment will also be enforced to small page size(64K/128K). caps). The physical alignment will also be enforced to small page
size(64K/128K).
Arguments: Arguments:
vaSpace[IN] - Pointer to vaSpace object vaSpace[IN] - Pointer to vaSpace object
@ -211,15 +212,15 @@ void nvUvmInterfaceAddressSpaceDestroy(uvmGpuAddressSpaceHandle vaSpace);
contains below given fields contains below given fields
allocInfo Members: allocInfo Members:
rangeBegin[IN] - Allocation will be made between rangeBegin
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
no-range limitation.
gpuPhysOffset[OUT] - Physical offset of allocation returned only gpuPhysOffset[OUT] - Physical offset of allocation returned only
if contiguous allocation is requested. if contiguous allocation is requested.
pageSize[IN] - Override the default page size (see above).
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
alignment.
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
will follow the vidHeapControl default policy. will follow the vidHeapControl default policy.
bHandleProvided [IN] - Flag to signify that the client has provided bMemGrowsDown[IN]
the handle for phys allocation. bPersistentVidmem[IN] - Allocate persistent vidmem.
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided. hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
If not provided; allocator will return the handle If not provided; allocator will return the handle
it used eventually. it used eventually.
@ -247,7 +248,6 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
and will return a unique GPU virtual address. and will return a unique GPU virtual address.
The default page size will be the small page size (as returned by query caps) The default page size will be the small page size (as returned by query caps)
The Alignment will also be enforced to small page size.
Arguments: Arguments:
vaSpace[IN] - Pointer to vaSpace object vaSpace[IN] - Pointer to vaSpace object
@ -257,15 +257,15 @@ NV_STATUS nvUvmInterfaceMemoryAllocFB(uvmGpuAddressSpaceHandle vaSpace,
contains below given fields contains below given fields
allocInfo Members: allocInfo Members:
rangeBegin[IN] - Allocation will be made between rangeBegin
rangeEnd[IN] and rangeEnd(both inclusive). Default will be
no-range limitation.
gpuPhysOffset[OUT] - Physical offset of allocation returned only gpuPhysOffset[OUT] - Physical offset of allocation returned only
if contiguous allocation is requested. if contiguous allocation is requested.
pageSize[IN] - Override the default page size (see above).
alignment[IN] - gpuPointer GPU VA alignment. 0 means 4KB
alignment.
bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default bContiguousPhysAlloc[IN] - Flag to request contiguous allocation. Default
will follow the vidHeapControl default policy. will follow the vidHeapControl default policy.
bHandleProvided [IN] - Flag to signify that the client has provided bMemGrowsDown[IN]
the handle for phys allocation. bPersistentVidmem[IN] - Allocate persistent vidmem.
hPhysHandle[IN/OUT] - The handle will be used in allocation if provided. hPhysHandle[IN/OUT] - The handle will be used in allocation if provided.
If not provided; allocator will return the handle If not provided; allocator will return the handle
it used eventually. it used eventually.
@ -671,14 +671,16 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
For duplication of physical memory use nvUvmInterfaceDupMemory. For duplication of physical memory use nvUvmInterfaceDupMemory.
Arguments: Arguments:
srcVaSpace[IN] - Source VA space. srcVaSpace[IN] - Source VA space.
srcAddress[IN] - GPU VA in the source VA space. The provided address srcAddress[IN] - GPU VA in the source VA space. The provided address
should match one previously returned by should match one previously returned by
nvUvmInterfaceMemoryAllocFB or nvUvmInterfaceMemoryAllocFB or
nvUvmInterfaceMemoryAllocSys. nvUvmInterfaceMemoryAllocSys.
dstVaSpace[IN] - Destination VA space where the new mapping will be dstVaSpace[IN] - Destination VA space where the new mapping will be
created. created.
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space. dstVaAlignment[IN] - Alignment of the GPU VA in the destination VA
space. 0 means 4KB alignment.
dstAddress[OUT] - Pointer to the GPU VA in the destination VA space.
Error codes: Error codes:
NV_ERR_INVALID_ARGUMENT - If any of the inputs is invalid, or the source NV_ERR_INVALID_ARGUMENT - If any of the inputs is invalid, or the source
@ -692,6 +694,7 @@ NV_STATUS nvUvmInterfaceUnsetPageDirectory(uvmGpuAddressSpaceHandle vaSpace);
NV_STATUS nvUvmInterfaceDupAllocation(uvmGpuAddressSpaceHandle srcVaSpace, NV_STATUS nvUvmInterfaceDupAllocation(uvmGpuAddressSpaceHandle srcVaSpace,
NvU64 srcAddress, NvU64 srcAddress,
uvmGpuAddressSpaceHandle dstVaSpace, uvmGpuAddressSpaceHandle dstVaSpace,
NvU64 dstVaAlignment,
NvU64 *dstAddress); NvU64 *dstAddress);
/******************************************************************************* /*******************************************************************************
@ -1068,10 +1071,6 @@ void nvUvmInterfaceP2pObjectDestroy(uvmGpuSessionHandle session,
NV_ERR_NOT_READY - Returned when querying the PTEs requires a deferred setup NV_ERR_NOT_READY - Returned when querying the PTEs requires a deferred setup
which has not yet completed. It is expected that the caller which has not yet completed. It is expected that the caller
will reattempt the call until a different code is returned. will reattempt the call until a different code is returned.
*/ */
NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace, NV_STATUS nvUvmInterfaceGetExternalAllocPtes(uvmGpuAddressSpaceHandle vaSpace,
NvHandle hMemory, NvHandle hMemory,
@ -1260,7 +1259,7 @@ void nvUvmInterfacePagingChannelDestroy(UvmGpuPagingChannelHandle channel);
device[IN] - device under which paging channels were allocated device[IN] - device under which paging channels were allocated
dstAddress[OUT] - a virtual address that is valid (i.e. is mapped) in dstAddress[OUT] - a virtual address that is valid (i.e. is mapped) in
all the paging channels allocated under the given vaSpace. all the paging channels allocated under the given vaSpace.
Error codes: Error codes:
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed. NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled. NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
@ -1373,7 +1372,7 @@ void nvUvmInterfacePagingChannelsUnmap(uvmGpuAddressSpaceHandle srcVaSpace,
methodStreamSize[IN] - Size of methodStream, in bytes. The maximum push methodStreamSize[IN] - Size of methodStream, in bytes. The maximum push
size is 128KB. size is 128KB.
Error codes: Error codes:
NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed. NV_ERR_INVALID_ARGUMENT - Invalid parameter/s is passed.
NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled. NV_ERR_NOT_SUPPORTED - SR-IOV heavy mode is disabled.
@ -1382,136 +1381,4 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
char *methodStream, char *methodStream,
NvU32 methodStreamSize); NvU32 methodStreamSize);
#endif // _NV_UVM_INTERFACE_H_ #endif // _NV_UVM_INTERFACE_H_

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -217,12 +217,6 @@ typedef struct UvmGpuChannelInstanceInfo_tag
// Out: Type of the engine the channel is bound to // Out: Type of the engine the channel is bound to
NvU32 channelEngineType; NvU32 channelEngineType;
// Out: Channel handle required to ring the doorbell
NvU32 workSubmissionToken;
// Out: Address of the doorbell
volatile NvU32 *workSubmissionOffset;
// Out: Channel handle to be used in the CLEAR_FAULTED method // Out: Channel handle to be used in the CLEAR_FAULTED method
NvU32 clearFaultedToken; NvU32 clearFaultedToken;
@ -231,6 +225,10 @@ typedef struct UvmGpuChannelInstanceInfo_tag
// Ampere+ GPUs // Ampere+ GPUs
volatile NvU32 *pChramChannelRegister; volatile NvU32 *pChramChannelRegister;
// Out: Address of the Runlist PRI Base Register required to ring the
// doorbell after clearing the faulted bit.
volatile NvU32 *pRunlistPRIBaseRegister;
// Out: SMC engine id to which the GR channel is bound, or zero if the GPU // Out: SMC engine id to which the GR channel is bound, or zero if the GPU
// does not support SMC or it is a CE channel // does not support SMC or it is a CE channel
NvU32 smcEngineId; NvU32 smcEngineId;
@ -372,10 +370,8 @@ typedef enum
UVM_LINK_TYPE_NVLINK_1, UVM_LINK_TYPE_NVLINK_1,
UVM_LINK_TYPE_NVLINK_2, UVM_LINK_TYPE_NVLINK_2,
UVM_LINK_TYPE_NVLINK_3, UVM_LINK_TYPE_NVLINK_3,
UVM_LINK_TYPE_NVLINK_4,
UVM_LINK_TYPE_C2C,
} UVM_LINK_TYPE; } UVM_LINK_TYPE;
typedef struct UvmGpuCaps_tag typedef struct UvmGpuCaps_tag
@ -433,19 +429,13 @@ typedef struct UvmGpuAddressSpaceInfo_tag
typedef struct UvmGpuAllocInfo_tag typedef struct UvmGpuAllocInfo_tag
{ {
NvU64 rangeBegin; // Allocation will be made between
NvU64 rangeEnd; // rangeBegin & rangeEnd both included
NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested
NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M NvU32 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M
NvU64 alignment; // Alignment of allocation NvU64 alignment; // Virtual alignment
NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation
NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB
NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory
NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved
} UvmGpuAllocInfo; } UvmGpuAllocInfo;
typedef enum typedef enum
@ -576,10 +566,8 @@ typedef struct UvmPlatformInfo_tag
// Out: ATS (Address Translation Services) is supported // Out: ATS (Address Translation Services) is supported
NvBool atsSupported; NvBool atsSupported;
// Out: AMD SEV (Secure Encrypted Virtualization) is enabled
NvBool sevEnabled;
} UvmPlatformInfo; } UvmPlatformInfo;
typedef struct UvmGpuClientInfo_tag typedef struct UvmGpuClientInfo_tag
@ -589,24 +577,6 @@ typedef struct UvmGpuClientInfo_tag
NvHandle hSmcPartRef; NvHandle hSmcPartRef;
} UvmGpuClientInfo; } UvmGpuClientInfo;
#define UVM_GPU_NAME_LENGTH 0x40 #define UVM_GPU_NAME_LENGTH 0x40
typedef struct UvmGpuInfo_tag typedef struct UvmGpuInfo_tag
@ -671,10 +641,6 @@ typedef struct UvmGpuInfo_tag
UvmGpuClientInfo smcUserClientInfo; UvmGpuClientInfo smcUserClientInfo;
} UvmGpuInfo; } UvmGpuInfo;
typedef struct UvmGpuFbInfo_tag typedef struct UvmGpuFbInfo_tag
@ -717,11 +683,6 @@ typedef struct UvmPmaStatistics_tag
volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions
volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions
volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions
} UvmPmaStatistics; } UvmPmaStatistics;
/******************************************************************************* /*******************************************************************************
@ -961,10 +922,4 @@ typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo;
typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams; typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams;
typedef UvmPmaAllocationOptions gpuPmaAllocationOptions; typedef UvmPmaAllocationOptions gpuPmaAllocationOptions;
#endif // _NV_UVM_TYPES_H_ #endif // _NV_UVM_TYPES_H_

View File

@ -416,6 +416,12 @@ struct NvKmsKapiCreateSurfaceParams {
NvU8 log2GobsPerBlockY; NvU8 log2GobsPerBlockY;
}; };
enum NvKmsKapiAllocationType {
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT = 0,
NVKMS_KAPI_ALLOCATION_TYPE_NOTIFIER = 1,
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN = 2,
};
struct NvKmsKapiFunctionsTable { struct NvKmsKapiFunctionsTable {
/*! /*!
@ -609,6 +615,8 @@ struct NvKmsKapiFunctionsTable {
* \param [in] device A device allocated using allocateDevice(). * \param [in] device A device allocated using allocateDevice().
* *
* \param [in] layout BlockLinear or Pitch. * \param [in] layout BlockLinear or Pitch.
*
* \param [in] type Allocation type.
* *
* \param [in] size Size, in bytes, of the memory to allocate. * \param [in] size Size, in bytes, of the memory to allocate.
* *
@ -624,6 +632,7 @@ struct NvKmsKapiFunctionsTable {
( (
struct NvKmsKapiDevice *device, struct NvKmsKapiDevice *device,
enum NvKmsSurfaceMemoryLayout layout, enum NvKmsSurfaceMemoryLayout layout,
enum NvKmsKapiAllocationType type,
NvU64 size, NvU64 size,
NvU8 *compressible NvU8 *compressible
); );
@ -637,6 +646,8 @@ struct NvKmsKapiFunctionsTable {
* \param [in] device A device allocated using allocateDevice(). * \param [in] device A device allocated using allocateDevice().
* *
* \param [in] layout BlockLinear or Pitch. * \param [in] layout BlockLinear or Pitch.
*
* \param [in] type Allocation type.
* *
* \param [in] size Size, in bytes, of the memory to allocate. * \param [in] size Size, in bytes, of the memory to allocate.
* *
@ -652,6 +663,7 @@ struct NvKmsKapiFunctionsTable {
( (
struct NvKmsKapiDevice *device, struct NvKmsKapiDevice *device,
enum NvKmsSurfaceMemoryLayout layout, enum NvKmsSurfaceMemoryLayout layout,
enum NvKmsKapiAllocationType type,
NvU64 size, NvU64 size,
NvU8 *compressible NvU8 *compressible
); );

View File

@ -31,13 +31,6 @@
/* /*
* This is the maximum number of GPUs supported in a single system. * This is the maximum number of GPUs supported in a single system.
*/ */

View File

@ -125,6 +125,7 @@ NvU32 NV_API_CALL os_get_cpu_number (void);
void NV_API_CALL os_disable_console_access (void); void NV_API_CALL os_disable_console_access (void);
void NV_API_CALL os_enable_console_access (void); void NV_API_CALL os_enable_console_access (void);
NV_STATUS NV_API_CALL os_registry_init (void); NV_STATUS NV_API_CALL os_registry_init (void);
NvU64 NV_API_CALL os_get_max_user_va (void);
NV_STATUS NV_API_CALL os_schedule (void); NV_STATUS NV_API_CALL os_schedule (void);
NV_STATUS NV_API_CALL os_alloc_spinlock (void **); NV_STATUS NV_API_CALL os_alloc_spinlock (void **);
void NV_API_CALL os_free_spinlock (void *); void NV_API_CALL os_free_spinlock (void *);
@ -193,19 +194,12 @@ void NV_API_CALL os_nv_cap_destroy_entry (nv_cap_t *);
int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int); int NV_API_CALL os_nv_cap_validate_and_dup_fd(const nv_cap_t *, int);
void NV_API_CALL os_nv_cap_close_fd (int); void NV_API_CALL os_nv_cap_close_fd (int);
enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_32BIT,
OS_INTF_PCIE_REQ_ATOMICS_64BIT,
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
extern NvU32 os_page_size; extern NvU32 os_page_size;
extern NvU64 os_page_mask; extern NvU64 os_page_mask;
@ -245,11 +239,4 @@ int NV_API_CALL nv_printf(NvU32 debuglevel, const char *printf_format, ...);
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_NO 0x00000000 #define NV_LOCK_USER_PAGES_FLAGS_WRITE_NO 0x00000000
#define NV_LOCK_USER_PAGES_FLAGS_WRITE_YES 0x00000001 #define NV_LOCK_USER_PAGES_FLAGS_WRITE_YES 0x00000001
#endif /* OS_INTERFACE_H */ #endif /* OS_INTERFACE_H */

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -63,7 +63,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_query_caps(nvidia_stack_t *, nvgpuDeviceHandle
NV_STATUS NV_API_CALL rm_gpu_ops_query_ces_caps(nvidia_stack_t *sp, nvgpuDeviceHandle_t, nvgpuCesCaps_t); NV_STATUS NV_API_CALL rm_gpu_ops_query_ces_caps(nvidia_stack_t *sp, nvgpuDeviceHandle_t, nvgpuCesCaps_t);
NV_STATUS NV_API_CALL rm_gpu_ops_get_gpu_info(nvidia_stack_t *, const NvProcessorUuid *pUuid, const nvgpuClientInfo_t *, nvgpuInfo_t *); NV_STATUS NV_API_CALL rm_gpu_ops_get_gpu_info(nvidia_stack_t *, const NvProcessorUuid *pUuid, const nvgpuClientInfo_t *, nvgpuInfo_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_service_device_interrupts_rm(nvidia_stack_t *, nvgpuDeviceHandle_t); NV_STATUS NV_API_CALL rm_gpu_ops_service_device_interrupts_rm(nvidia_stack_t *, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64 *); NV_STATUS NV_API_CALL rm_gpu_ops_dup_allocation(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuAddressSpaceHandle_t, NvU64, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_dup_memory (nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, NvHandle *, nvgpuMemoryInfo_t); NV_STATUS NV_API_CALL rm_gpu_ops_dup_memory (nvidia_stack_t *, nvgpuDeviceHandle_t, NvHandle, NvHandle, NvHandle *, nvgpuMemoryInfo_t);
@ -98,13 +98,4 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channels_map(nvidia_stack_t *, nvgpuAdd
void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t); void NV_API_CALL rm_gpu_ops_paging_channels_unmap(nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, nvgpuDeviceHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32); NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, nvgpuPagingChannelHandle_t, char *, NvU32);
#endif #endif

View File

@ -2357,10 +2357,41 @@ compile_test() {
# linux-4.4.168 cherry-picked commit 768ae309a961 without # linux-4.4.168 cherry-picked commit 768ae309a961 without
# c12d2da56d0e which is covered in Conftest #3. # c12d2da56d0e which is covered in Conftest #3.
# #
#
# This function sets the NV_GET_USER_PAGES_* macros as per the below
# passing conftest's
#
set_get_user_pages_defines () {
if [ "$1" = "NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE" ]; then
echo "#define NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE" ]; then
echo "#define NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS" ]; then
echo "#define NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_HAS_ARGS_FLAGS" ]; then
echo "#define NV_GET_USER_PAGES_HAS_ARGS_FLAGS" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_HAS_ARGS_FLAGS" | append_conftest "functions"
fi
}
# Conftest #1: Check if get_user_pages accepts 6 arguments. # Conftest #1: Check if get_user_pages accepts 6 arguments.
# Return if true. # Return if true.
# Fall through to conftest #2 on failure. # Fall through to conftest #2 on failure.
#
echo "$CONFTEST_PREAMBLE echo "$CONFTEST_PREAMBLE
#include <linux/mm.h> #include <linux/mm.h>
long get_user_pages(unsigned long start, long get_user_pages(unsigned long start,
@ -2375,8 +2406,7 @@ compile_test() {
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1 $CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#define NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions" set_get_user_pages_defines "NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE"
echo "#undef NV_GET_USER_PAGES_HAS_TASK_STRUCT" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
@ -2401,8 +2431,7 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#undef NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions" set_get_user_pages_defines "NV_GET_USER_PAGES_HAS_ARGS_FLAGS"
echo "#undef NV_GET_USER_PAGES_HAS_TASK_STRUCT" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
@ -2429,14 +2458,12 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#undef NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions" set_get_user_pages_defines "NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS"
echo "#define NV_GET_USER_PAGES_HAS_TASK_STRUCT" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
echo "#define NV_GET_USER_PAGES_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions" set_get_user_pages_defines "NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE"
echo "#define NV_GET_USER_PAGES_HAS_TASK_STRUCT" | append_conftest "functions"
return return
;; ;;
@ -2463,10 +2490,47 @@ compile_test() {
# commit 64019a2e467a ("mm/gup: remove task_struct pointer for # commit 64019a2e467a ("mm/gup: remove task_struct pointer for
# all gup code") in v5.9-rc1 (2020-08-11). # all gup code") in v5.9-rc1 (2020-08-11).
# #
#
# This function sets the NV_GET_USER_PAGES_REMOTE_* macros as per
# the below passing conftest's
#
set_get_user_pages_remote_defines () {
if [ "$1" = "" ]; then
echo "#undef NV_GET_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
else
echo "#define NV_GET_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE" ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS" ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED" ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED" | append_conftest "functions"
fi
if [ "$1" = "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED" ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED" | append_conftest "functions"
else
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED" | append_conftest "functions"
fi
}
# conftest #1: check if get_user_pages_remote() is available # conftest #1: check if get_user_pages_remote() is available
# return if not available. # return if not available.
# Fall through to conftest #2 if it is present # Fall through to conftest #2 if it is present
#
echo "$CONFTEST_PREAMBLE echo "$CONFTEST_PREAMBLE
#include <linux/mm.h> #include <linux/mm.h>
void conftest_get_user_pages_remote(void) { void conftest_get_user_pages_remote(void) {
@ -2477,10 +2541,7 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#undef NV_GET_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions" set_get_user_pages_remote_defines ""
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG" | append_conftest "functions"
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions"
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
@ -2490,7 +2551,6 @@ compile_test() {
# force arguments. Return if these arguments are present # force arguments. Return if these arguments are present
# Fall through to conftest #3 if these args are absent. # Fall through to conftest #3 if these args are absent.
# #
echo "#define NV_GET_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
echo "$CONFTEST_PREAMBLE echo "$CONFTEST_PREAMBLE
#include <linux/mm.h> #include <linux/mm.h>
long get_user_pages_remote(struct task_struct *tsk, long get_user_pages_remote(struct task_struct *tsk,
@ -2508,18 +2568,40 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG" | append_conftest "functions" set_get_user_pages_remote_defines "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE"
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions"
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_WRITE_AND_FORCE_ARGS" | append_conftest "functions" #
# conftest #3: check if get_user_pages_remote() has gpu_flags
# arguments. Return if these arguments are present
# Fall through to conftest #4 if these args are absent.
#
echo "$CONFTEST_PREAMBLE
#include <linux/mm.h>
long get_user_pages_remote(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
unsigned int gpu_flags,
struct page **pages,
struct vm_area_struct **vmas) {
return 0;
}" > conftest$$.c
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
rm -f conftest$$.c
if [ -f conftest$$.o ]; then
set_get_user_pages_remote_defines "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS"
rm -f conftest$$.o
return
fi
# #
# conftest #3: check if get_user_pages_remote() has locked argument # conftest #4: check if get_user_pages_remote() has locked argument
# Return if these arguments are present. Fall through to conftest #4 # Return if these arguments are present. Fall through to conftest #5
# if these args are absent. # if these args are absent.
# #
echo "$CONFTEST_PREAMBLE echo "$CONFTEST_PREAMBLE
@ -2539,14 +2621,13 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG" | append_conftest "functions" set_get_user_pages_remote_defines "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED"
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
return return
fi fi
# #
# conftest #4: check if get_user_pages_remote() does not take # conftest #5: check if get_user_pages_remote() does not take
# tsk argument. # tsk argument.
# #
echo "$CONFTEST_PREAMBLE echo "$CONFTEST_PREAMBLE
@ -2565,13 +2646,8 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
if [ -f conftest$$.o ]; then if [ -f conftest$$.o ]; then
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG" | append_conftest "functions" set_get_user_pages_remote_defines "NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED"
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG" | append_conftest "functions"
rm -f conftest$$.o rm -f conftest$$.o
else
echo "#define NV_GET_USER_PAGES_REMOTE_HAS_TSK_ARG" | append_conftest "functions"
echo "#undef NV_GET_USER_PAGES_REMOTE_HAS_LOCKED_ARG" | append_conftest "functions"
fi fi
;; ;;
@ -5185,6 +5261,22 @@ compile_test() {
rm -f conftest$$.c rm -f conftest$$.c
;; ;;
platform_irq_count)
#
# Determine if the platform_irq_count() function is present
#
# platform_irq_count was added by commit
# 4b83555d5098e73cf2c5ca7f86c17ca0ba3b968e ("driver-core: platform: Add platform_irq_count()")
# in 4.5-rc1 (2016-01-07)
#
CODE="
#include <linux/platform_device.h>
int conftest_platform_irq_count(void) {
return platform_irq_count();
}"
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
;;
dma_resv_add_fence) dma_resv_add_fence)
# #
# Determine if the dma_resv_add_fence() function is present. # Determine if the dma_resv_add_fence() function is present.

View File

@ -0,0 +1,25 @@
count:
@echo "conftests:$(words $(ALL_CONFTESTS))" \
"objects:$(words $(NV_OBJECTS_DEPEND_ON_CONFTEST))" \
"modules:$(words $(NV_KERNEL_MODULES))"
.PHONY: count
# Include the top-level makefile to get $(NV_KERNEL_MODULES)
include Makefile
# Set $(src) for the to-be-included nvidia*.Kbuild files
src := $(CURDIR)
# Include nvidia*.Kbuild and append the nvidia*-y objects to ALL_OBJECTS
$(foreach _module, $(NV_KERNEL_MODULES), \
$(eval include $(_module)/$(_module).Kbuild) \
)
# Concatenate all of the conftest lists; use $(sort ) to remove duplicates
ALL_CONFTESTS := $(sort $(NV_CONFTEST_FUNCTION_COMPILE_TESTS) \
$(NV_CONFTEST_GENERIC_COMPILE_TESTS) \
$(NV_CONFTEST_MACRO_COMPILE_TESTS) \
$(NV_CONFTEST_SYMBOL_COMPILE_TESTS) \
$(NV_CONFTEST_TYPE_COMPILE_TESTS) \
)

View File

@ -285,11 +285,13 @@ int nv_drm_dumb_create(
if (nv_dev->hasVideoMemory) { if (nv_dev->hasVideoMemory) {
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice, pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
NvKmsSurfaceMemoryLayoutPitch, NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
args->size, args->size,
&compressible); &compressible);
} else { } else {
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice, pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
NvKmsSurfaceMemoryLayoutPitch, NvKmsSurfaceMemoryLayoutPitch,
NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT,
args->size, args->size,
&compressible); &compressible);
} }
@ -441,6 +443,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = NULL; struct nv_drm_gem_nvkms_memory *nv_nvkms_memory = NULL;
struct NvKmsKapiMemory *pMemory; struct NvKmsKapiMemory *pMemory;
enum NvKmsSurfaceMemoryLayout layout; enum NvKmsSurfaceMemoryLayout layout;
enum NvKmsKapiAllocationType type;
int ret = 0; int ret = 0;
if (!drm_core_check_feature(dev, DRIVER_MODESET)) { if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
@ -449,6 +452,7 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
} }
if (p->__pad != 0) { if (p->__pad != 0) {
ret = -EINVAL;
NV_DRM_DEV_LOG_ERR(nv_dev, "non-zero value in padding field"); NV_DRM_DEV_LOG_ERR(nv_dev, "non-zero value in padding field");
goto failed; goto failed;
} }
@ -461,15 +465,19 @@ int nv_drm_gem_alloc_nvkms_memory_ioctl(struct drm_device *dev,
layout = p->block_linear ? layout = p->block_linear ?
NvKmsSurfaceMemoryLayoutBlockLinear : NvKmsSurfaceMemoryLayoutPitch; NvKmsSurfaceMemoryLayoutBlockLinear : NvKmsSurfaceMemoryLayoutPitch;
type = (p->flags & NV_GEM_ALLOC_NO_SCANOUT) ?
NVKMS_KAPI_ALLOCATION_TYPE_OFFSCREEN : NVKMS_KAPI_ALLOCATION_TYPE_SCANOUT;
if (nv_dev->hasVideoMemory) { if (nv_dev->hasVideoMemory) {
pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice, pMemory = nvKms->allocateVideoMemory(nv_dev->pDevice,
layout, layout,
type,
p->memory_size, p->memory_size,
&p->compressible); &p->compressible);
} else { } else {
pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice, pMemory = nvKms->allocateSystemMemory(nv_dev->pDevice,
layout, layout,
type,
p->memory_size, p->memory_size,
&p->compressible); &p->compressible);
} }

View File

@ -199,6 +199,8 @@ struct drm_nvidia_gem_map_offset_params {
uint64_t offset; /* OUT Fake offset */ uint64_t offset; /* OUT Fake offset */
}; };
#define NV_GEM_ALLOC_NO_SCANOUT (1 << 0)
struct drm_nvidia_gem_alloc_nvkms_memory_params { struct drm_nvidia_gem_alloc_nvkms_memory_params {
uint32_t handle; /* OUT */ uint32_t handle; /* OUT */
uint8_t block_linear; /* IN */ uint8_t block_linear; /* IN */
@ -206,6 +208,7 @@ struct drm_nvidia_gem_alloc_nvkms_memory_params {
uint16_t __pad; uint16_t __pad;
uint64_t memory_size; /* IN */ uint64_t memory_size; /* IN */
uint32_t flags; /* IN */
}; };
struct drm_nvidia_gem_export_dmabuf_memory_params { struct drm_nvidia_gem_export_dmabuf_memory_params {

View File

@ -175,11 +175,7 @@ module_init(nv_linux_drm_init);
module_exit(nv_linux_drm_exit); module_exit(nv_linux_drm_exit);
#if defined(MODULE_LICENSE) #if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL"); MODULE_LICENSE("Dual MIT/GPL");
#endif #endif
#if defined(MODULE_INFO) #if defined(MODULE_INFO)
MODULE_INFO(supported, "external"); MODULE_INFO(supported, "external");

View File

@ -59,7 +59,7 @@
#define NVKMS_LOG_PREFIX "nvidia-modeset: " #define NVKMS_LOG_PREFIX "nvidia-modeset: "
static bool output_rounding_fix = false; static bool output_rounding_fix = true;
module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400); module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
/* These parameters are used for fault injection tests. Normally the defaults /* These parameters are used for fault injection tests. Normally the defaults
@ -85,110 +85,6 @@ NvBool nvkms_output_rounding_fix(void)
* NVKMS interface for nvhost unit for sync point APIs. * NVKMS interface for nvhost unit for sync point APIs.
*************************************************************************/ *************************************************************************/
#ifdef NVKMS_SYNCPT_STUBS_NEEDED #ifdef NVKMS_SYNCPT_STUBS_NEEDED
/* Unsupported STUB for nvkms_syncpt APIs */ /* Unsupported STUB for nvkms_syncpt APIs */
NvBool nvkms_syncpt_op( NvBool nvkms_syncpt_op(
@ -1735,11 +1631,7 @@ module_init(nvkms_init);
module_exit(nvkms_exit); module_exit(nvkms_exit);
#if defined(MODULE_LICENSE) #if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL"); MODULE_LICENSE("Dual MIT/GPL");
#endif #endif
#if defined(MODULE_INFO) #if defined(MODULE_INFO)
MODULE_INFO(supported, "external"); MODULE_INFO(supported, "external");

View File

@ -177,11 +177,7 @@ struct nvidia_p2p_page_table {
* This means the pages underlying the range of GPU virtual memory * This means the pages underlying the range of GPU virtual memory
* will persist until explicitly freed by nvidia_p2p_put_pages(). * will persist until explicitly freed by nvidia_p2p_put_pages().
* Persistent GPU memory mappings are not supported on PowerPC, * Persistent GPU memory mappings are not supported on PowerPC,
* MIG-enabled devices and vGPU. * MIG-enabled devices and vGPU.
* @param[in] data * @param[in] data
* A non-NULL opaque pointer to private data to be passed to the * A non-NULL opaque pointer to private data to be passed to the
* callback function. * callback function.

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -116,6 +116,14 @@ extern "C" {
#define NVA16F_GP_ENTRY1_LEVEL_MAIN 0x00000000 #define NVA16F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001 #define NVA16F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVA16F_GP_ENTRY1_LENGTH 30:10 #define NVA16F_GP_ENTRY1_LENGTH 30:10
#define NVA16F_GP_ENTRY1_SYNC 31:31
#define NVA16F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVA16F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVA16F_GP_ENTRY1_OPCODE 7:0
#define NVA16F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVA16F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVA16F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVA16F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
/* dma method formats */ /* dma method formats */
#define NVA16F_DMA_METHOD_ADDRESS 11:0 #define NVA16F_DMA_METHOD_ADDRESS 11:0

View File

@ -0,0 +1,375 @@
/*******************************************************************************
Copyright (c) 2012-2015 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef _clc86f_h_
#define _clc86f_h_
#ifdef __cplusplus
extern "C" {
#endif
#include "nvtypes.h"
/* class HOPPER_CHANNEL_GPFIFO */
/*
* Documentation for HOPPER_CHANNEL_GPFIFO can be found in dev_pbdma.ref,
* chapter "User Control Registers". It is documented as device NV_UDMA.
* The GPFIFO format itself is also documented in dev_pbdma.ref,
* NV_PPBDMA_GP_ENTRY_*. The pushbuffer format is documented in dev_ram.ref,
* chapter "FIFO DMA RAM", NV_FIFO_DMA_*.
*
* Note there is no .mfs file for this class.
*/
#define HOPPER_CHANNEL_GPFIFO_A (0x0000C86F)
#define NVC86F_TYPEDEF HOPPER_CHANNELChannelGPFifoA
/* dma flow control data structure */
typedef volatile struct Nvc86fControl_struct {
NvU32 Ignored00[0x010]; /* 0000-003f*/
NvU32 Put; /* put offset, read/write 0040-0043*/
NvU32 Get; /* get offset, read only 0044-0047*/
NvU32 Reference; /* reference value, read only 0048-004b*/
NvU32 PutHi; /* high order put offset bits 004c-004f*/
NvU32 Ignored01[0x002]; /* 0050-0057*/
NvU32 TopLevelGet; /* top level get offset, read only 0058-005b*/
NvU32 TopLevelGetHi; /* high order top level get bits 005c-005f*/
NvU32 GetHi; /* high order get offset bits 0060-0063*/
NvU32 Ignored02[0x007]; /* 0064-007f*/
NvU32 Ignored03; /* used to be engine yield 0080-0083*/
NvU32 Ignored04[0x001]; /* 0084-0087*/
NvU32 GPGet; /* GP FIFO get offset, read only 0088-008b*/
NvU32 GPPut; /* GP FIFO put offset 008c-008f*/
NvU32 Ignored05[0x5c];
} Nvc86fControl, HopperAControlGPFifo;
/* fields and values */
#define NVC86F_NUMBER_OF_SUBCHANNELS (8)
#define NVC86F_SET_OBJECT (0x00000000)
#define NVC86F_SET_OBJECT_NVCLASS 15:0
#define NVC86F_SET_OBJECT_ENGINE 20:16
#define NVC86F_SET_OBJECT_ENGINE_SW 0x0000001f
#define NVC86F_ILLEGAL (0x00000004)
#define NVC86F_ILLEGAL_HANDLE 31:0
#define NVC86F_NOP (0x00000008)
#define NVC86F_NOP_HANDLE 31:0
#define NVC86F_SEMAPHOREA (0x00000010)
#define NVC86F_SEMAPHOREA_OFFSET_UPPER 7:0
#define NVC86F_SEMAPHOREB (0x00000014)
#define NVC86F_SEMAPHOREB_OFFSET_LOWER 31:2
#define NVC86F_SEMAPHOREC (0x00000018)
#define NVC86F_SEMAPHOREC_PAYLOAD 31:0
#define NVC86F_SEMAPHORED (0x0000001C)
#define NVC86F_SEMAPHORED_OPERATION 4:0
#define NVC86F_SEMAPHORED_OPERATION_ACQUIRE 0x00000001
#define NVC86F_SEMAPHORED_OPERATION_RELEASE 0x00000002
#define NVC86F_SEMAPHORED_OPERATION_ACQ_GEQ 0x00000004
#define NVC86F_SEMAPHORED_OPERATION_ACQ_AND 0x00000008
#define NVC86F_SEMAPHORED_OPERATION_REDUCTION 0x00000010
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH 12:12
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_DISABLED 0x00000000
#define NVC86F_SEMAPHORED_ACQUIRE_SWITCH_ENABLED 0x00000001
#define NVC86F_SEMAPHORED_RELEASE_WFI 20:20
#define NVC86F_SEMAPHORED_RELEASE_WFI_EN 0x00000000
#define NVC86F_SEMAPHORED_RELEASE_WFI_DIS 0x00000001
#define NVC86F_SEMAPHORED_RELEASE_SIZE 24:24
#define NVC86F_SEMAPHORED_RELEASE_SIZE_16BYTE 0x00000000
#define NVC86F_SEMAPHORED_RELEASE_SIZE_4BYTE 0x00000001
#define NVC86F_SEMAPHORED_REDUCTION 30:27
#define NVC86F_SEMAPHORED_REDUCTION_MIN 0x00000000
#define NVC86F_SEMAPHORED_REDUCTION_MAX 0x00000001
#define NVC86F_SEMAPHORED_REDUCTION_XOR 0x00000002
#define NVC86F_SEMAPHORED_REDUCTION_AND 0x00000003
#define NVC86F_SEMAPHORED_REDUCTION_OR 0x00000004
#define NVC86F_SEMAPHORED_REDUCTION_ADD 0x00000005
#define NVC86F_SEMAPHORED_REDUCTION_INC 0x00000006
#define NVC86F_SEMAPHORED_REDUCTION_DEC 0x00000007
#define NVC86F_SEMAPHORED_FORMAT 31:31
#define NVC86F_SEMAPHORED_FORMAT_SIGNED 0x00000000
#define NVC86F_SEMAPHORED_FORMAT_UNSIGNED 0x00000001
#define NVC86F_NON_STALL_INTERRUPT (0x00000020)
#define NVC86F_NON_STALL_INTERRUPT_HANDLE 31:0
#define NVC86F_FB_FLUSH (0x00000024) // Deprecated - use MEMBAR TYPE SYS_MEMBAR
#define NVC86F_FB_FLUSH_HANDLE 31:0
// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for
// specifying the page address for a targeted TLB invalidate and the uTLB for
// a targeted REPLAY_CANCEL for UVM.
// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly
// rearranged fields.
#define NVC86F_MEM_OP_A (0x00000028)
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE 7:6 // only relevant for invalidates with NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE for invalidating link TLB only, or non-link TLB only or all TLBs
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_ALL_TLBS 0
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_LINK_TLBS 1
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_NON_LINK_TLBS 2
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_INVAL_SCOPE_RSVRVD 3
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 8:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000
#define NVC86F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12
#define NVC86F_MEM_OP_B (0x0000002c)
#define NVC86F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0
#define NVC86F_MEM_OP_C (0x00000030)
#define NVC86F_MEM_OP_C_MEMBAR_TYPE 2:0
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000
#define NVC86F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003
#define NVC86F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE
#define NVC86F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0
// MEM_OP_D MUST be preceded by MEM_OPs A-C.
#define NVC86F_MEM_OP_D (0x00000034)
#define NVC86F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE
#define NVC86F_MEM_OP_D_OPERATION 31:27
#define NVC86F_MEM_OP_D_OPERATION_MEMBAR 0x00000005
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009
#define NVC86F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a
#define NVC86F_MEM_OP_D_OPERATION_MMU_OPERATION 0x0000000b
#define NVC86F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d
#define NVC86F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e
// CLEAN_LINES is an alias for Tegra/GPU IP usage
#define NVC86F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e
#define NVC86F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f
#define NVC86F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010
#define NVC86F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015
#define NVC86F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001
#define NVC86F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE 23:20
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_RESERVED 0x00000000
#define NVC86F_MEM_OP_D_MMU_OPERATION_TYPE_VIDMEM_ACCESS_BIT_DUMP 0x00000001
#define NVC86F_SET_REFERENCE (0x00000050)
#define NVC86F_SET_REFERENCE_COUNT 31:0
#define NVC86F_SEM_ADDR_LO (0x0000005c)
#define NVC86F_SEM_ADDR_LO_OFFSET 31:2
#define NVC86F_SEM_ADDR_HI (0x00000060)
#define NVC86F_SEM_ADDR_HI_OFFSET 24:0
#define NVC86F_SEM_PAYLOAD_LO (0x00000064)
#define NVC86F_SEM_PAYLOAD_LO_PAYLOAD 31:0
#define NVC86F_SEM_PAYLOAD_HI (0x00000068)
#define NVC86F_SEM_PAYLOAD_HI_PAYLOAD 31:0
#define NVC86F_SEM_EXECUTE (0x0000006c)
#define NVC86F_SEM_EXECUTE_OPERATION 2:0
#define NVC86F_SEM_EXECUTE_OPERATION_ACQUIRE 0x00000000
#define NVC86F_SEM_EXECUTE_OPERATION_RELEASE 0x00000001
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_STRICT_GEQ 0x00000002
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_CIRC_GEQ 0x00000003
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_AND 0x00000004
#define NVC86F_SEM_EXECUTE_OPERATION_ACQ_NOR 0x00000005
#define NVC86F_SEM_EXECUTE_OPERATION_REDUCTION 0x00000006
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG 12:12
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_ACQUIRE_SWITCH_TSG_EN 0x00000001
#define NVC86F_SEM_EXECUTE_RELEASE_WFI 20:20
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_RELEASE_WFI_EN 0x00000001
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE 24:24
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_32BIT 0x00000000
#define NVC86F_SEM_EXECUTE_PAYLOAD_SIZE_64BIT 0x00000001
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP 25:25
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_DIS 0x00000000
#define NVC86F_SEM_EXECUTE_RELEASE_TIMESTAMP_EN 0x00000001
#define NVC86F_SEM_EXECUTE_REDUCTION 30:27
#define NVC86F_SEM_EXECUTE_REDUCTION_IMIN 0x00000000
#define NVC86F_SEM_EXECUTE_REDUCTION_IMAX 0x00000001
#define NVC86F_SEM_EXECUTE_REDUCTION_IXOR 0x00000002
#define NVC86F_SEM_EXECUTE_REDUCTION_IAND 0x00000003
#define NVC86F_SEM_EXECUTE_REDUCTION_IOR 0x00000004
#define NVC86F_SEM_EXECUTE_REDUCTION_IADD 0x00000005
#define NVC86F_SEM_EXECUTE_REDUCTION_INC 0x00000006
#define NVC86F_SEM_EXECUTE_REDUCTION_DEC 0x00000007
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT 31:31
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_SIGNED 0x00000000
#define NVC86F_SEM_EXECUTE_REDUCTION_FORMAT_UNSIGNED 0x00000001
#define NVC86F_WFI (0x00000078)
#define NVC86F_WFI_SCOPE 0:0
#define NVC86F_WFI_SCOPE_CURRENT_SCG_TYPE 0x00000000
#define NVC86F_WFI_SCOPE_CURRENT_VEID 0x00000000
#define NVC86F_WFI_SCOPE_ALL 0x00000001
#define NVC86F_YIELD (0x00000080)
#define NVC86F_YIELD_OP 1:0
#define NVC86F_YIELD_OP_NOP 0x00000000
#define NVC86F_YIELD_OP_TSG 0x00000003
#define NVC86F_CLEAR_FAULTED (0x00000084)
// Note: RM provides the HANDLE as an opaque value; the internal detail fields
// are intentionally not exposed to the driver through these defines.
#define NVC86F_CLEAR_FAULTED_HANDLE 30:0
#define NVC86F_CLEAR_FAULTED_TYPE 31:31
#define NVC86F_CLEAR_FAULTED_TYPE_PBDMA_FAULTED 0x00000000
#define NVC86F_CLEAR_FAULTED_TYPE_ENG_FAULTED 0x00000001
#define NVC86F_QUADRO_VERIFY (0x000000a0)
/* GPFIFO entry format */
#define NVC86F_GP_ENTRY__SIZE 8
#define NVC86F_GP_ENTRY0_FETCH 0:0
#define NVC86F_GP_ENTRY0_FETCH_UNCONDITIONAL 0x00000000
#define NVC86F_GP_ENTRY0_FETCH_CONDITIONAL 0x00000001
#define NVC86F_GP_ENTRY0_GET 31:2
#define NVC86F_GP_ENTRY0_OPERAND 31:0
#define NVC86F_GP_ENTRY0_PB_EXTENDED_BASE_OPERAND 24:8
#define NVC86F_GP_ENTRY1_GET_HI 7:0
#define NVC86F_GP_ENTRY1_LEVEL 9:9
#define NVC86F_GP_ENTRY1_LEVEL_MAIN 0x00000000
#define NVC86F_GP_ENTRY1_LEVEL_SUBROUTINE 0x00000001
#define NVC86F_GP_ENTRY1_LENGTH 30:10
#define NVC86F_GP_ENTRY1_SYNC 31:31
#define NVC86F_GP_ENTRY1_SYNC_PROCEED 0x00000000
#define NVC86F_GP_ENTRY1_SYNC_WAIT 0x00000001
#define NVC86F_GP_ENTRY1_OPCODE 7:0
#define NVC86F_GP_ENTRY1_OPCODE_NOP 0x00000000
#define NVC86F_GP_ENTRY1_OPCODE_ILLEGAL 0x00000001
#define NVC86F_GP_ENTRY1_OPCODE_GP_CRC 0x00000002
#define NVC86F_GP_ENTRY1_OPCODE_PB_CRC 0x00000003
#define NVC86F_GP_ENTRY1_OPCODE_SET_PB_SEGMENT_EXTENDED_BASE 0x00000004
/* dma method formats */
#define NVC86F_DMA_METHOD_ADDRESS_OLD 12:2
#define NVC86F_DMA_METHOD_ADDRESS 11:0
#define NVC86F_DMA_SUBDEVICE_MASK 15:4
#define NVC86F_DMA_METHOD_SUBCHANNEL 15:13
#define NVC86F_DMA_TERT_OP 17:16
#define NVC86F_DMA_TERT_OP_GRP0_INC_METHOD (0x00000000)
#define NVC86F_DMA_TERT_OP_GRP0_SET_SUB_DEV_MASK (0x00000001)
#define NVC86F_DMA_TERT_OP_GRP0_STORE_SUB_DEV_MASK (0x00000002)
#define NVC86F_DMA_TERT_OP_GRP0_USE_SUB_DEV_MASK (0x00000003)
#define NVC86F_DMA_TERT_OP_GRP2_NON_INC_METHOD (0x00000000)
#define NVC86F_DMA_METHOD_COUNT_OLD 28:18
#define NVC86F_DMA_METHOD_COUNT 28:16
#define NVC86F_DMA_IMMD_DATA 28:16
#define NVC86F_DMA_SEC_OP 31:29
#define NVC86F_DMA_SEC_OP_GRP0_USE_TERT (0x00000000)
#define NVC86F_DMA_SEC_OP_INC_METHOD (0x00000001)
#define NVC86F_DMA_SEC_OP_GRP2_USE_TERT (0x00000002)
#define NVC86F_DMA_SEC_OP_NON_INC_METHOD (0x00000003)
#define NVC86F_DMA_SEC_OP_IMMD_DATA_METHOD (0x00000004)
#define NVC86F_DMA_SEC_OP_ONE_INC (0x00000005)
#define NVC86F_DMA_SEC_OP_RESERVED6 (0x00000006)
#define NVC86F_DMA_SEC_OP_END_PB_SEGMENT (0x00000007)
/* dma incrementing method format */
#define NVC86F_DMA_INCR_ADDRESS 11:0
#define NVC86F_DMA_INCR_SUBCHANNEL 15:13
#define NVC86F_DMA_INCR_COUNT 28:16
#define NVC86F_DMA_INCR_OPCODE 31:29
#define NVC86F_DMA_INCR_OPCODE_VALUE (0x00000001)
#define NVC86F_DMA_INCR_DATA 31:0
/* dma non-incrementing method format */
#define NVC86F_DMA_NONINCR_ADDRESS 11:0
#define NVC86F_DMA_NONINCR_SUBCHANNEL 15:13
#define NVC86F_DMA_NONINCR_COUNT 28:16
#define NVC86F_DMA_NONINCR_OPCODE 31:29
#define NVC86F_DMA_NONINCR_OPCODE_VALUE (0x00000003)
#define NVC86F_DMA_NONINCR_DATA 31:0
/* dma increment-once method format */
#define NVC86F_DMA_ONEINCR_ADDRESS 11:0
#define NVC86F_DMA_ONEINCR_SUBCHANNEL 15:13
#define NVC86F_DMA_ONEINCR_COUNT 28:16
#define NVC86F_DMA_ONEINCR_OPCODE 31:29
#define NVC86F_DMA_ONEINCR_OPCODE_VALUE (0x00000005)
#define NVC86F_DMA_ONEINCR_DATA 31:0
/* dma no-operation format */
#define NVC86F_DMA_NOP (0x00000000)
/* dma immediate-data format */
#define NVC86F_DMA_IMMD_ADDRESS 11:0
#define NVC86F_DMA_IMMD_SUBCHANNEL 15:13
#define NVC86F_DMA_IMMD_DATA 28:16
#define NVC86F_DMA_IMMD_OPCODE 31:29
#define NVC86F_DMA_IMMD_OPCODE_VALUE (0x00000004)
/* dma set sub-device mask format */
#define NVC86F_DMA_SET_SUBDEVICE_MASK_VALUE 15:4
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_SET_SUBDEVICE_MASK_OPCODE_VALUE (0x00000001)
/* dma store sub-device mask format */
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_VALUE 15:4
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_STORE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000002)
/* dma use sub-device mask format */
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE 31:16
#define NVC86F_DMA_USE_SUBDEVICE_MASK_OPCODE_VALUE (0x00000003)
/* dma end-segment format */
#define NVC86F_DMA_ENDSEG_OPCODE 31:29
#define NVC86F_DMA_ENDSEG_OPCODE_VALUE (0x00000007)
/* dma legacy incrementing/non-incrementing formats */
#define NVC86F_DMA_ADDRESS 12:2
#define NVC86F_DMA_SUBCH 15:13
#define NVC86F_DMA_OPCODE3 17:16
#define NVC86F_DMA_OPCODE3_NONE (0x00000000)
#define NVC86F_DMA_COUNT 28:18
#define NVC86F_DMA_OPCODE 31:29
#define NVC86F_DMA_OPCODE_METHOD (0x00000000)
#define NVC86F_DMA_OPCODE_NONINC_METHOD (0x00000002)
#define NVC86F_DMA_DATA 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* _clc86f_h_ */

View File

@ -0,0 +1,430 @@
/*******************************************************************************
Copyright (c) 1993-2004 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "nvtypes.h"
#ifndef _clc8b5_h_
#define _clc8b5_h_
#ifdef __cplusplus
extern "C" {
#endif
#define HOPPER_DMA_COPY_A (0x0000C8B5)
typedef volatile struct _clc8b5_tag0 {
NvV32 Reserved00[0x40];
NvV32 Nop; // 0x00000100 - 0x00000103
NvV32 Reserved01[0xF];
NvV32 PmTrigger; // 0x00000140 - 0x00000143
NvV32 Reserved02[0x36];
NvV32 SetMonitoredFenceType; // 0x0000021C - 0x0000021F
NvV32 SetMonitoredFenceSignalAddrBaseUpper; // 0x00000220 - 0x00000223
NvV32 SetMonitoredFenceSignalAddrBaseLower; // 0x00000224 - 0x00000227
NvV32 Reserved03[0x6];
NvV32 SetSemaphoreA; // 0x00000240 - 0x00000243
NvV32 SetSemaphoreB; // 0x00000244 - 0x00000247
NvV32 SetSemaphorePayload; // 0x00000248 - 0x0000024B
NvV32 SetSemaphorePayloadUpper; // 0x0000024C - 0x0000024F
NvV32 Reserved04[0x1];
NvV32 SetRenderEnableA; // 0x00000254 - 0x00000257
NvV32 SetRenderEnableB; // 0x00000258 - 0x0000025B
NvV32 SetRenderEnableC; // 0x0000025C - 0x0000025F
NvV32 SetSrcPhysMode; // 0x00000260 - 0x00000263
NvV32 SetDstPhysMode; // 0x00000264 - 0x00000267
NvV32 Reserved05[0x26];
NvV32 LaunchDma; // 0x00000300 - 0x00000303
NvV32 Reserved06[0x3F];
NvV32 OffsetInUpper; // 0x00000400 - 0x00000403
NvV32 OffsetInLower; // 0x00000404 - 0x00000407
NvV32 OffsetOutUpper; // 0x00000408 - 0x0000040B
NvV32 OffsetOutLower; // 0x0000040C - 0x0000040F
NvV32 PitchIn; // 0x00000410 - 0x00000413
NvV32 PitchOut; // 0x00000414 - 0x00000417
NvV32 LineLengthIn; // 0x00000418 - 0x0000041B
NvV32 LineCount; // 0x0000041C - 0x0000041F
NvV32 Reserved07[0x38];
NvV32 SetSecureCopyMode; // 0x00000500 - 0x00000503
NvV32 SetDecryptIv0; // 0x00000504 - 0x00000507
NvV32 SetDecryptIv1; // 0x00000508 - 0x0000050B
NvV32 SetDecryptIv2; // 0x0000050C - 0x0000050F
NvV32 Reserved_SetAESCounter; // 0x00000510 - 0x00000513
NvV32 SetDecryptAuthTagCompareAddrUpper; // 0x00000514 - 0x00000517
NvV32 SetDecryptAuthTagCompareAddrLower; // 0x00000518 - 0x0000051B
NvV32 Reserved08[0x5];
NvV32 SetEncryptAuthTagAddrUpper; // 0x00000530 - 0x00000533
NvV32 SetEncryptAuthTagAddrLower; // 0x00000534 - 0x00000537
NvV32 SetEncryptIvAddrUpper; // 0x00000538 - 0x0000053B
NvV32 SetEncryptIvAddrLower; // 0x0000053C - 0x0000053F
NvV32 Reserved09[0x6F];
NvV32 SetMemoryScrubParameters; // 0x000006FC - 0x000006FF
NvV32 SetRemapConstA; // 0x00000700 - 0x00000703
NvV32 SetRemapConstB; // 0x00000704 - 0x00000707
NvV32 SetRemapComponents; // 0x00000708 - 0x0000070B
NvV32 SetDstBlockSize; // 0x0000070C - 0x0000070F
NvV32 SetDstWidth; // 0x00000710 - 0x00000713
NvV32 SetDstHeight; // 0x00000714 - 0x00000717
NvV32 SetDstDepth; // 0x00000718 - 0x0000071B
NvV32 SetDstLayer; // 0x0000071C - 0x0000071F
NvV32 SetDstOrigin; // 0x00000720 - 0x00000723
NvV32 Reserved10[0x1];
NvV32 SetSrcBlockSize; // 0x00000728 - 0x0000072B
NvV32 SetSrcWidth; // 0x0000072C - 0x0000072F
NvV32 SetSrcHeight; // 0x00000730 - 0x00000733
NvV32 SetSrcDepth; // 0x00000734 - 0x00000737
NvV32 SetSrcLayer; // 0x00000738 - 0x0000073B
NvV32 SetSrcOrigin; // 0x0000073C - 0x0000073F
NvV32 Reserved11[0x1];
NvV32 SrcOriginX; // 0x00000744 - 0x00000747
NvV32 SrcOriginY; // 0x00000748 - 0x0000074B
NvV32 DstOriginX; // 0x0000074C - 0x0000074F
NvV32 DstOriginY; // 0x00000750 - 0x00000753
NvV32 Reserved12[0x270];
NvV32 PmTriggerEnd; // 0x00001114 - 0x00001117
NvV32 Reserved13[0x3BA];
} hopper_dma_copy_aControlPio;
#define NVC8B5_NOP (0x00000100)
#define NVC8B5_NOP_PARAMETER 31:0
#define NVC8B5_PM_TRIGGER (0x00000140)
#define NVC8B5_PM_TRIGGER_V 31:0
#define NVC8B5_SET_MONITORED_FENCE_TYPE (0x0000021C)
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE 0:0
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE (0x00000000)
#define NVC8B5_SET_MONITORED_FENCE_TYPE_TYPE_MONITORED_FENCE_EXT (0x00000001)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER (0x00000220)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_UPPER_UPPER 24:0
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER (0x00000224)
#define NVC8B5_SET_MONITORED_FENCE_SIGNAL_ADDR_BASE_LOWER_LOWER 31:0
#define NVC8B5_SET_SEMAPHORE_A (0x00000240)
#define NVC8B5_SET_SEMAPHORE_A_UPPER 24:0
#define NVC8B5_SET_SEMAPHORE_B (0x00000244)
#define NVC8B5_SET_SEMAPHORE_B_LOWER 31:0
#define NVC8B5_SET_SEMAPHORE_PAYLOAD (0x00000248)
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_PAYLOAD 31:0
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER (0x0000024C)
#define NVC8B5_SET_SEMAPHORE_PAYLOAD_UPPER_PAYLOAD 31:0
#define NVC8B5_SET_RENDER_ENABLE_A (0x00000254)
#define NVC8B5_SET_RENDER_ENABLE_A_UPPER 24:0
#define NVC8B5_SET_RENDER_ENABLE_B (0x00000258)
#define NVC8B5_SET_RENDER_ENABLE_B_LOWER 31:0
#define NVC8B5_SET_RENDER_ENABLE_C (0x0000025C)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE 2:0
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_FALSE (0x00000000)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_TRUE (0x00000001)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_CONDITIONAL (0x00000002)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_EQUAL (0x00000003)
#define NVC8B5_SET_RENDER_ENABLE_C_MODE_RENDER_IF_NOT_EQUAL (0x00000004)
#define NVC8B5_SET_SRC_PHYS_MODE (0x00000260)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SRC_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SRC_PHYS_MODE_BASIC_KIND 5:2
#define NVC8B5_SET_SRC_PHYS_MODE_PEER_ID 8:6
#define NVC8B5_SET_SRC_PHYS_MODE_FLA 9:9
#define NVC8B5_SET_DST_PHYS_MODE (0x00000264)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET 1:0
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_DST_PHYS_MODE_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_DST_PHYS_MODE_BASIC_KIND 5:2
#define NVC8B5_SET_DST_PHYS_MODE_PEER_ID 8:6
#define NVC8B5_SET_DST_PHYS_MODE_FLA 9:9
#define NVC8B5_LAUNCH_DMA (0x00000300)
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE 1:0
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_PIPELINED (0x00000001)
#define NVC8B5_LAUNCH_DMA_DATA_TRANSFER_TYPE_NON_PIPELINED (0x00000002)
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE 2:2
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_FLUSH_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE 25:25
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_SYS (0x00000000)
#define NVC8B5_LAUNCH_DMA_FLUSH_TYPE_GL (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE 4:3
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_NO_TIMESTAMP (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_SEMAPHORE_WITH_TIMESTAMP (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_ONE_WORD_SEMAPHORE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_FOUR_WORD_SEMAPHORE (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_TYPE_RELEASE_CONDITIONAL_INTR_SEMAPHORE (0x00000003)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE 6:5
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_BLOCKING (0x00000001)
#define NVC8B5_LAUNCH_DMA_INTERRUPT_TYPE_NON_BLOCKING (0x00000002)
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT 7:7
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC8B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT 8:8
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR (0x00000000)
#define NVC8B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH (0x00000001)
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE 9:9
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_MULTI_LINE_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE 10:10
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_REMAP_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE 11:11
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_FORCE_RMWDISABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE 12:12
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_VIRTUAL (0x00000000)
#define NVC8B5_LAUNCH_DMA_SRC_TYPE_PHYSICAL (0x00000001)
#define NVC8B5_LAUNCH_DMA_DST_TYPE 13:13
#define NVC8B5_LAUNCH_DMA_DST_TYPE_VIRTUAL (0x00000000)
#define NVC8B5_LAUNCH_DMA_DST_TYPE_PHYSICAL (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION 17:14
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMIN (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IMAX (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IXOR (0x00000002)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IAND (0x00000003)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IOR (0x00000004)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_IADD (0x00000005)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INC (0x00000006)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_DEC (0x00000007)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDA (0x00000008)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDB (0x00000009)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FADD (0x0000000A)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMIN (0x0000000B)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_FMAX (0x0000000C)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDC (0x0000000D)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDD (0x0000000E)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_INVALIDE (0x0000000F)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN 18:18
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_SIGNED (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_SIGN_UNSIGNED (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE 19:19
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_REDUCTION_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE 21:20
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_PROT2PROT (0x00000000)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_DEFAULT (0x00000000)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_SECURE (0x00000001)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_NONPROT2NONPROT (0x00000002)
#define NVC8B5_LAUNCH_DMA_COPY_TYPE_RESERVED (0x00000003)
#define NVC8B5_LAUNCH_DMA_VPRMODE 22:22
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_NONE (0x00000000)
#define NVC8B5_LAUNCH_DMA_VPRMODE_VPR_VID2VID (0x00000001)
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE 23:23
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_MEMORY_SCRUB_ENABLE_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_RESERVED_START_OF_COPY 24:24
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC 26:26
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_FALSE (0x00000000)
#define NVC8B5_LAUNCH_DMA_DISABLE_PLC_TRUE (0x00000001)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE 27:27
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_ONE_WORD (0x00000000)
#define NVC8B5_LAUNCH_DMA_SEMAPHORE_PAYLOAD_SIZE_TWO_WORD (0x00000001)
#define NVC8B5_LAUNCH_DMA_RESERVED_ERR_CODE 31:28
#define NVC8B5_OFFSET_IN_UPPER (0x00000400)
#define NVC8B5_OFFSET_IN_UPPER_UPPER 24:0
#define NVC8B5_OFFSET_IN_LOWER (0x00000404)
#define NVC8B5_OFFSET_IN_LOWER_VALUE 31:0
#define NVC8B5_OFFSET_OUT_UPPER (0x00000408)
#define NVC8B5_OFFSET_OUT_UPPER_UPPER 24:0
#define NVC8B5_OFFSET_OUT_LOWER (0x0000040C)
#define NVC8B5_OFFSET_OUT_LOWER_VALUE 31:0
#define NVC8B5_PITCH_IN (0x00000410)
#define NVC8B5_PITCH_IN_VALUE 31:0
#define NVC8B5_PITCH_OUT (0x00000414)
#define NVC8B5_PITCH_OUT_VALUE 31:0
#define NVC8B5_LINE_LENGTH_IN (0x00000418)
#define NVC8B5_LINE_LENGTH_IN_VALUE 31:0
#define NVC8B5_LINE_COUNT (0x0000041C)
#define NVC8B5_LINE_COUNT_VALUE 31:0
#define NVC8B5_SET_SECURE_COPY_MODE (0x00000500)
#define NVC8B5_SET_SECURE_COPY_MODE_MODE 0:0
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_ENCRYPT (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_MODE_DECRYPT (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET 20:19
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_PEER_ID 23:21
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_SRC_FLA 24:24
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET 26:25
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_LOCAL_FB (0x00000000)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_COHERENT_SYSMEM (0x00000001)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_NONCOHERENT_SYSMEM (0x00000002)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_TARGET_PEERMEM (0x00000003)
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_PEER_ID 29:27
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_DST_FLA 30:30
#define NVC8B5_SET_SECURE_COPY_MODE_RESERVED_END_OF_COPY 31:31
#define NVC8B5_SET_DECRYPT_IV0 (0x00000504)
#define NVC8B5_SET_DECRYPT_IV0_VALUE 31:0
#define NVC8B5_SET_DECRYPT_IV1 (0x00000508)
#define NVC8B5_SET_DECRYPT_IV1_VALUE 31:0
#define NVC8B5_SET_DECRYPT_IV2 (0x0000050C)
#define NVC8B5_SET_DECRYPT_IV2_VALUE 31:0
#define NVC8B5_RESERVED_SET_AESCOUNTER (0x00000510)
#define NVC8B5_RESERVED_SET_AESCOUNTER_VALUE 31:0
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER (0x00000514)
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_UPPER_UPPER 24:0
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER (0x00000518)
#define NVC8B5_SET_DECRYPT_AUTH_TAG_COMPARE_ADDR_LOWER_LOWER 31:0
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER (0x00000530)
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_UPPER_UPPER 24:0
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER (0x00000534)
#define NVC8B5_SET_ENCRYPT_AUTH_TAG_ADDR_LOWER_LOWER 31:0
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER (0x00000538)
#define NVC8B5_SET_ENCRYPT_IV_ADDR_UPPER_UPPER 24:0
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER (0x0000053C)
#define NVC8B5_SET_ENCRYPT_IV_ADDR_LOWER_LOWER 31:0
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS (0x000006FC)
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE 0:0
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_FALSE (0x00000000)
#define NVC8B5_SET_MEMORY_SCRUB_PARAMETERS_DISCARDABLE_TRUE (0x00000001)
#define NVC8B5_SET_REMAP_CONST_A (0x00000700)
#define NVC8B5_SET_REMAP_CONST_A_V 31:0
#define NVC8B5_SET_REMAP_CONST_B (0x00000704)
#define NVC8B5_SET_REMAP_CONST_B_V 31:0
#define NVC8B5_SET_REMAP_COMPONENTS (0x00000708)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X 2:0
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_X_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y 6:4
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Y_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z 10:8
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_Z_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W 14:12
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_X (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Y (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_Z (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_SRC_W (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_A (0x00000004)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_CONST_B (0x00000005)
#define NVC8B5_SET_REMAP_COMPONENTS_DST_W_NO_WRITE (0x00000006)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE 17:16
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_COMPONENT_SIZE_FOUR (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS 21:20
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_SRC_COMPONENTS_FOUR (0x00000003)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS 25:24
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_ONE (0x00000000)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_TWO (0x00000001)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_THREE (0x00000002)
#define NVC8B5_SET_REMAP_COMPONENTS_NUM_DST_COMPONENTS_FOUR (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE (0x0000070C)
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH 3:0
#define NVC8B5_SET_DST_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT 7:4
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_DST_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH 11:8
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_DST_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC8B5_SET_DST_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC8B5_SET_DST_WIDTH (0x00000710)
#define NVC8B5_SET_DST_WIDTH_V 31:0
#define NVC8B5_SET_DST_HEIGHT (0x00000714)
#define NVC8B5_SET_DST_HEIGHT_V 31:0
#define NVC8B5_SET_DST_DEPTH (0x00000718)
#define NVC8B5_SET_DST_DEPTH_V 31:0
#define NVC8B5_SET_DST_LAYER (0x0000071C)
#define NVC8B5_SET_DST_LAYER_V 31:0
#define NVC8B5_SET_DST_ORIGIN (0x00000720)
#define NVC8B5_SET_DST_ORIGIN_X 15:0
#define NVC8B5_SET_DST_ORIGIN_Y 31:16
#define NVC8B5_SET_SRC_BLOCK_SIZE (0x00000728)
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH 3:0
#define NVC8B5_SET_SRC_BLOCK_SIZE_WIDTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT 7:4
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_TWO_GOBS (0x00000001)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_SRC_BLOCK_SIZE_HEIGHT_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH 11:8
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_ONE_GOB (0x00000000)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_TWO_GOBS (0x00000001)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_FOUR_GOBS (0x00000002)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_EIGHT_GOBS (0x00000003)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_SIXTEEN_GOBS (0x00000004)
#define NVC8B5_SET_SRC_BLOCK_SIZE_DEPTH_THIRTYTWO_GOBS (0x00000005)
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT 15:12
#define NVC8B5_SET_SRC_BLOCK_SIZE_GOB_HEIGHT_GOB_HEIGHT_FERMI_8 (0x00000001)
#define NVC8B5_SET_SRC_WIDTH (0x0000072C)
#define NVC8B5_SET_SRC_WIDTH_V 31:0
#define NVC8B5_SET_SRC_HEIGHT (0x00000730)
#define NVC8B5_SET_SRC_HEIGHT_V 31:0
#define NVC8B5_SET_SRC_DEPTH (0x00000734)
#define NVC8B5_SET_SRC_DEPTH_V 31:0
#define NVC8B5_SET_SRC_LAYER (0x00000738)
#define NVC8B5_SET_SRC_LAYER_V 31:0
#define NVC8B5_SET_SRC_ORIGIN (0x0000073C)
#define NVC8B5_SET_SRC_ORIGIN_X 15:0
#define NVC8B5_SET_SRC_ORIGIN_Y 31:16
#define NVC8B5_SRC_ORIGIN_X (0x00000744)
#define NVC8B5_SRC_ORIGIN_X_VALUE 31:0
#define NVC8B5_SRC_ORIGIN_Y (0x00000748)
#define NVC8B5_SRC_ORIGIN_Y_VALUE 31:0
#define NVC8B5_DST_ORIGIN_X (0x0000074C)
#define NVC8B5_DST_ORIGIN_X_VALUE 31:0
#define NVC8B5_DST_ORIGIN_Y (0x00000750)
#define NVC8B5_DST_ORIGIN_Y_VALUE 31:0
#define NVC8B5_PM_TRIGGER_END (0x00001114)
#define NVC8B5_PM_TRIGGER_END_V 31:0
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif // _clc8b5_h

View File

@ -32,15 +32,8 @@
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100 (0x00000140) #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100 (0x00000140)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100 (0x00000160) #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100 (0x00000160)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170) #define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100 (0x00000170)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100 (0x00000180)
#define NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100 (0x00000190)
/* valid ARCHITECTURE_GP10x implementation values */ /* valid ARCHITECTURE_GP10x implementation values */
#define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000) #define NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_GP100 (0x00000000)

View File

@ -0,0 +1,508 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gh100_dev_fault_h__
#define __gh100_dev_fault_h__
/* This file is autogenerated. Do not edit */
#define NV_PFAULT /* ----G */
#define NV_PFAULT_MMU_ENG_ID_GRAPHICS 384 /* */
#define NV_PFAULT_MMU_ENG_ID_DISPLAY 1 /* */
#define NV_PFAULT_MMU_ENG_ID_GSP 2 /* */
#define NV_PFAULT_MMU_ENG_ID_IFB 55 /* */
#define NV_PFAULT_MMU_ENG_ID_FLA 4 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1 256 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2 320 /* */
#define NV_PFAULT_MMU_ENG_ID_SEC 6 /* */
#define NV_PFAULT_MMU_ENG_ID_FSP 7 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF 10 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF0 10 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF1 11 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF2 12 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF3 13 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF4 14 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF5 15 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF6 16 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF7 17 /* */
#define NV_PFAULT_MMU_ENG_ID_PERF8 18 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC 19 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC0 19 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC1 20 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC2 21 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC3 22 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC4 23 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC5 24 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC6 25 /* */
#define NV_PFAULT_MMU_ENG_ID_NVDEC7 26 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG0 27 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG1 28 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG2 29 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG3 30 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG4 31 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG5 32 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG6 33 /* */
#define NV_PFAULT_MMU_ENG_ID_NVJPG7 34 /* */
#define NV_PFAULT_MMU_ENG_ID_GRCOPY 43 /* */
#define NV_PFAULT_MMU_ENG_ID_CE0 43 /* */
#define NV_PFAULT_MMU_ENG_ID_CE1 44 /* */
#define NV_PFAULT_MMU_ENG_ID_CE2 45 /* */
#define NV_PFAULT_MMU_ENG_ID_CE3 46 /* */
#define NV_PFAULT_MMU_ENG_ID_CE4 47 /* */
#define NV_PFAULT_MMU_ENG_ID_CE5 48 /* */
#define NV_PFAULT_MMU_ENG_ID_CE6 49 /* */
#define NV_PFAULT_MMU_ENG_ID_CE7 50 /* */
#define NV_PFAULT_MMU_ENG_ID_CE8 51 /* */
#define NV_PFAULT_MMU_ENG_ID_CE9 52 /* */
#define NV_PFAULT_MMU_ENG_ID_PWR_PMU 5 /* */
#define NV_PFAULT_MMU_ENG_ID_PTP 3 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC0 35 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC1 36 /* */
#define NV_PFAULT_MMU_ENG_ID_NVENC2 37 /* */
#define NV_PFAULT_MMU_ENG_ID_OFA0 53 /* */
#define NV_PFAULT_MMU_ENG_ID_PHYSICAL 56 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST0 64 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST1 65 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST2 66 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST3 67 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST4 68 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST5 69 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST6 70 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST7 71 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST8 72 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST9 73 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST10 74 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST11 75 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST12 76 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST13 77 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST14 78 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST15 79 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST16 80 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST17 81 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST18 82 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST19 83 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST20 84 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST21 85 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST22 86 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST23 87 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST24 88 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST25 89 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST26 90 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST27 91 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST28 92 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST29 93 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST30 94 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST31 95 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST32 96 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST33 97 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST34 98 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST35 99 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST36 100 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST37 101 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST38 102 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST39 103 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST40 104 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST41 105 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST42 106 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST43 107 /* */
#define NV_PFAULT_MMU_ENG_ID_HOST44 108 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN0 256 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN1 257 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN2 258 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN3 259 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN4 260 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN5 261 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN6 262 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN7 263 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN8 264 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN9 265 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN10 266 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN11 267 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN12 268 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN13 269 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN14 270 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN15 271 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN16 272 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN17 273 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN18 274 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN19 275 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN20 276 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN21 277 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN22 278 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN23 279 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN24 280 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN25 281 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN26 282 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN27 283 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN28 284 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN29 285 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN30 286 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN31 287 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN32 288 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN33 289 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN34 290 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN35 291 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN36 292 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN37 293 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN38 294 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN39 295 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN40 296 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN41 297 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN42 298 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN43 299 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN44 300 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN45 301 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN46 302 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN47 303 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN48 304 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN49 305 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN50 306 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN51 307 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN52 308 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN53 309 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN54 310 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN55 311 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN56 312 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN57 313 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN58 314 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN59 315 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN60 316 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN61 317 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN62 318 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR1_FN63 319 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN0 320 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN1 321 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN2 322 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN3 323 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN4 324 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN5 325 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN6 326 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN7 327 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN8 328 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN9 329 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN10 330 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN11 331 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN12 332 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN13 333 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN14 334 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN15 335 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN16 336 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN17 337 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN18 338 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN19 339 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN20 340 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN21 341 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN22 342 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN23 343 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN24 344 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN25 345 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN26 346 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN27 347 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN28 348 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN29 349 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN30 350 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN31 351 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN32 352 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN33 353 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN34 354 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN35 355 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN36 356 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN37 357 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN38 358 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN39 359 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN40 360 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN41 361 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN42 362 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN43 363 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN44 364 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN45 365 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN46 366 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN47 367 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN48 368 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN49 369 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN50 370 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN51 371 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN52 372 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN53 373 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN54 374 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN55 375 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN56 376 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN57 377 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN58 378 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN59 379 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN60 380 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN61 381 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN62 382 /* */
#define NV_PFAULT_MMU_ENG_ID_BAR2_FN63 383 /* */
#define NV_PFAULT_FAULT_TYPE 4:0 /* */
#define NV_PFAULT_FAULT_TYPE_PDE 0x00000000 /* */
#define NV_PFAULT_FAULT_TYPE_PDE_SIZE 0x00000001 /* */
#define NV_PFAULT_FAULT_TYPE_PTE 0x00000002 /* */
#define NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION 0x00000003 /* */
#define NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK 0x00000004 /* */
#define NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION 0x00000005 /* */
#define NV_PFAULT_FAULT_TYPE_RO_VIOLATION 0x00000006 /* */
#define NV_PFAULT_FAULT_TYPE_WO_VIOLATION 0x00000007 /* */
#define NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION 0x00000008 /* */
#define NV_PFAULT_FAULT_TYPE_WORK_CREATION 0x00000009 /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE 0x0000000a /* */
#define NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE 0x0000000b /* */
#define NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND 0x0000000c /* */
#define NV_PFAULT_FAULT_TYPE_REGION_VIOLATION 0x0000000d /* */
#define NV_PFAULT_FAULT_TYPE_POISONED 0x0000000e /* */
#define NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION 0x0000000f /* */
#define NV_PFAULT_CLIENT 14:8 /* */
#define NV_PFAULT_CLIENT_GPC_T1_0 0x00000000 /* */
#define NV_PFAULT_CLIENT_GPC_T1_1 0x00000001 /* */
#define NV_PFAULT_CLIENT_GPC_T1_2 0x00000002 /* */
#define NV_PFAULT_CLIENT_GPC_T1_3 0x00000003 /* */
#define NV_PFAULT_CLIENT_GPC_T1_4 0x00000004 /* */
#define NV_PFAULT_CLIENT_GPC_T1_5 0x00000005 /* */
#define NV_PFAULT_CLIENT_GPC_T1_6 0x00000006 /* */
#define NV_PFAULT_CLIENT_GPC_T1_7 0x00000007 /* */
#define NV_PFAULT_CLIENT_GPC_PE_0 0x00000008 /* */
#define NV_PFAULT_CLIENT_GPC_PE_1 0x00000009 /* */
#define NV_PFAULT_CLIENT_GPC_PE_2 0x0000000A /* */
#define NV_PFAULT_CLIENT_GPC_PE_3 0x0000000B /* */
#define NV_PFAULT_CLIENT_GPC_PE_4 0x0000000C /* */
#define NV_PFAULT_CLIENT_GPC_PE_5 0x0000000D /* */
#define NV_PFAULT_CLIENT_GPC_PE_6 0x0000000E /* */
#define NV_PFAULT_CLIENT_GPC_PE_7 0x0000000F /* */
#define NV_PFAULT_CLIENT_GPC_RAST 0x00000010 /* */
#define NV_PFAULT_CLIENT_GPC_GCC 0x00000011 /* */
#define NV_PFAULT_CLIENT_GPC_GPCCS 0x00000012 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_0 0x00000013 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_1 0x00000014 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_2 0x00000015 /* */
#define NV_PFAULT_CLIENT_GPC_PROP_3 0x00000016 /* */
#define NV_PFAULT_CLIENT_GPC_T1_8 0x00000021 /* */
#define NV_PFAULT_CLIENT_GPC_T1_9 0x00000022 /* */
#define NV_PFAULT_CLIENT_GPC_T1_10 0x00000023 /* */
#define NV_PFAULT_CLIENT_GPC_T1_11 0x00000024 /* */
#define NV_PFAULT_CLIENT_GPC_T1_12 0x00000025 /* */
#define NV_PFAULT_CLIENT_GPC_T1_13 0x00000026 /* */
#define NV_PFAULT_CLIENT_GPC_T1_14 0x00000027 /* */
#define NV_PFAULT_CLIENT_GPC_T1_15 0x00000028 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_0 0x00000029 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_1 0x0000002A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_2 0x0000002B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_3 0x0000002C /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_4 0x0000002D /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_5 0x0000002E /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_6 0x0000002F /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_7 0x00000030 /* */
#define NV_PFAULT_CLIENT_GPC_PE_8 0x00000031 /* */
#define NV_PFAULT_CLIENT_GPC_PE_9 0x00000032 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_8 0x00000033 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_9 0x00000034 /* */
#define NV_PFAULT_CLIENT_GPC_T1_16 0x00000035 /* */
#define NV_PFAULT_CLIENT_GPC_T1_17 0x00000036 /* */
#define NV_PFAULT_CLIENT_GPC_T1_18 0x00000037 /* */
#define NV_PFAULT_CLIENT_GPC_T1_19 0x00000038 /* */
#define NV_PFAULT_CLIENT_GPC_PE_10 0x00000039 /* */
#define NV_PFAULT_CLIENT_GPC_PE_11 0x0000003A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_10 0x0000003B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_11 0x0000003C /* */
#define NV_PFAULT_CLIENT_GPC_T1_20 0x0000003D /* */
#define NV_PFAULT_CLIENT_GPC_T1_21 0x0000003E /* */
#define NV_PFAULT_CLIENT_GPC_T1_22 0x0000003F /* */
#define NV_PFAULT_CLIENT_GPC_T1_23 0x00000040 /* */
#define NV_PFAULT_CLIENT_GPC_PE_12 0x00000041 /* */
#define NV_PFAULT_CLIENT_GPC_PE_13 0x00000042 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_12 0x00000043 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_13 0x00000044 /* */
#define NV_PFAULT_CLIENT_GPC_T1_24 0x00000045 /* */
#define NV_PFAULT_CLIENT_GPC_T1_25 0x00000046 /* */
#define NV_PFAULT_CLIENT_GPC_T1_26 0x00000047 /* */
#define NV_PFAULT_CLIENT_GPC_T1_27 0x00000048 /* */
#define NV_PFAULT_CLIENT_GPC_PE_14 0x00000049 /* */
#define NV_PFAULT_CLIENT_GPC_PE_15 0x0000004A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_14 0x0000004B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_15 0x0000004C /* */
#define NV_PFAULT_CLIENT_GPC_T1_28 0x0000004D /* */
#define NV_PFAULT_CLIENT_GPC_T1_29 0x0000004E /* */
#define NV_PFAULT_CLIENT_GPC_T1_30 0x0000004F /* */
#define NV_PFAULT_CLIENT_GPC_T1_31 0x00000050 /* */
#define NV_PFAULT_CLIENT_GPC_PE_16 0x00000051 /* */
#define NV_PFAULT_CLIENT_GPC_PE_17 0x00000052 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_16 0x00000053 /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_17 0x00000054 /* */
#define NV_PFAULT_CLIENT_GPC_T1_32 0x00000055 /* */
#define NV_PFAULT_CLIENT_GPC_T1_33 0x00000056 /* */
#define NV_PFAULT_CLIENT_GPC_T1_34 0x00000057 /* */
#define NV_PFAULT_CLIENT_GPC_T1_35 0x00000058 /* */
#define NV_PFAULT_CLIENT_GPC_PE_18 0x00000059 /* */
#define NV_PFAULT_CLIENT_GPC_PE_19 0x0000005A /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_18 0x0000005B /* */
#define NV_PFAULT_CLIENT_GPC_TPCCS_19 0x0000005C /* */
#define NV_PFAULT_CLIENT_GPC_T1_36 0x0000005D /* */
#define NV_PFAULT_CLIENT_GPC_T1_37 0x0000005E /* */
#define NV_PFAULT_CLIENT_GPC_T1_38 0x0000005F /* */
#define NV_PFAULT_CLIENT_GPC_T1_39 0x00000060 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_0 0x00000070 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_1 0x00000071 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_2 0x00000072 /* */
#define NV_PFAULT_CLIENT_GPC_ROP_3 0x00000073 /* */
#define NV_PFAULT_CLIENT_GPC_GPM 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_VIP 0x00000000 /* */
#define NV_PFAULT_CLIENT_HUB_CE0 0x00000001 /* */
#define NV_PFAULT_CLIENT_HUB_CE1 0x00000002 /* */
#define NV_PFAULT_CLIENT_HUB_DNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_DISPNISO 0x00000003 /* */
#define NV_PFAULT_CLIENT_HUB_FE0 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FE 0x00000004 /* */
#define NV_PFAULT_CLIENT_HUB_FECS0 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_FECS 0x00000005 /* */
#define NV_PFAULT_CLIENT_HUB_HOST 0x00000006 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU 0x00000007 /* */
#define NV_PFAULT_CLIENT_HUB_HOST_CPU_NB 0x00000008 /* */
#define NV_PFAULT_CLIENT_HUB_ISO 0x00000009 /* */
#define NV_PFAULT_CLIENT_HUB_MMU 0x0000000A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC0 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC 0x0000000B /* */
#define NV_PFAULT_CLIENT_HUB_CE3 0x0000000C /* */
#define NV_PFAULT_CLIENT_HUB_NVENC1 0x0000000D /* */
#define NV_PFAULT_CLIENT_HUB_NISO 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_ACTRS 0x0000000E /* */
#define NV_PFAULT_CLIENT_HUB_P2P 0x0000000F /* */
#define NV_PFAULT_CLIENT_HUB_PD 0x00000010 /* */
#define NV_PFAULT_CLIENT_HUB_PERF0 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PERF 0x00000011 /* */
#define NV_PFAULT_CLIENT_HUB_PMU 0x00000012 /* */
#define NV_PFAULT_CLIENT_HUB_RASTERTWOD 0x00000013 /* */
#define NV_PFAULT_CLIENT_HUB_SCC 0x00000014 /* */
#define NV_PFAULT_CLIENT_HUB_SCC_NB 0x00000015 /* */
#define NV_PFAULT_CLIENT_HUB_SEC 0x00000016 /* */
#define NV_PFAULT_CLIENT_HUB_SSYNC 0x00000017 /* */
#define NV_PFAULT_CLIENT_HUB_GRCOPY 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_CE2 0x00000018 /* */
#define NV_PFAULT_CLIENT_HUB_XV 0x00000019 /* */
#define NV_PFAULT_CLIENT_HUB_MMU_NB 0x0000001A /* */
#define NV_PFAULT_CLIENT_HUB_NVENC0 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_NVENC 0x0000001B /* */
#define NV_PFAULT_CLIENT_HUB_DFALCON 0x0000001C /* */
#define NV_PFAULT_CLIENT_HUB_SKED0 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_SKED 0x0000001D /* */
#define NV_PFAULT_CLIENT_HUB_AFALCON 0x0000001E /* */
#define NV_PFAULT_CLIENT_HUB_DONT_CARE 0x0000001F /* */
#define NV_PFAULT_CLIENT_HUB_HSCE0 0x00000020 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE1 0x00000021 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE2 0x00000022 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE3 0x00000023 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE4 0x00000024 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE5 0x00000025 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE6 0x00000026 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE7 0x00000027 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE8 0x00000028 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE9 0x00000029 /* */
#define NV_PFAULT_CLIENT_HUB_HSHUB 0x0000002A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X0 0x0000002B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X1 0x0000002C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X2 0x0000002D /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X3 0x0000002E /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X4 0x0000002F /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X5 0x00000030 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X6 0x00000031 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X7 0x00000032 /* */
#define NV_PFAULT_CLIENT_HUB_NVENC2 0x00000033 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER0 0x00000034 /* */
#define NV_PFAULT_CLIENT_HUB_VPR_SCRUBBER1 0x00000035 /* */
#define NV_PFAULT_CLIENT_HUB_DWBIF 0x00000036 /* */
#define NV_PFAULT_CLIENT_HUB_FBFALCON 0x00000037 /* */
#define NV_PFAULT_CLIENT_HUB_CE_SHIM 0x00000038 /* */
#define NV_PFAULT_CLIENT_HUB_GSP 0x00000039 /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC1 0x0000003A /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC2 0x0000003B /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG0 0x0000003C /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC3 0x0000003D /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC4 0x0000003E /* */
#define NV_PFAULT_CLIENT_HUB_OFA0 0x0000003F /* */
#define NV_PFAULT_CLIENT_HUB_HSCE10 0x00000040 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE11 0x00000041 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE12 0x00000042 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE13 0x00000043 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE14 0x00000044 /* */
#define NV_PFAULT_CLIENT_HUB_HSCE15 0x00000045 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X8 0x00000046 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X9 0x00000047 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X10 0x00000048 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X11 0x00000049 /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X12 0x0000004A /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X13 0x0000004B /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X14 0x0000004C /* */
#define NV_PFAULT_CLIENT_HUB_PTP_X15 0x0000004D /* */
#define NV_PFAULT_CLIENT_HUB_FE1 0x0000004E /* */
#define NV_PFAULT_CLIENT_HUB_FE2 0x0000004F /* */
#define NV_PFAULT_CLIENT_HUB_FE3 0x00000050 /* */
#define NV_PFAULT_CLIENT_HUB_FE4 0x00000051 /* */
#define NV_PFAULT_CLIENT_HUB_FE5 0x00000052 /* */
#define NV_PFAULT_CLIENT_HUB_FE6 0x00000053 /* */
#define NV_PFAULT_CLIENT_HUB_FE7 0x00000054 /* */
#define NV_PFAULT_CLIENT_HUB_FECS1 0x00000055 /* */
#define NV_PFAULT_CLIENT_HUB_FECS2 0x00000056 /* */
#define NV_PFAULT_CLIENT_HUB_FECS3 0x00000057 /* */
#define NV_PFAULT_CLIENT_HUB_FECS4 0x00000058 /* */
#define NV_PFAULT_CLIENT_HUB_FECS5 0x00000059 /* */
#define NV_PFAULT_CLIENT_HUB_FECS6 0x0000005A /* */
#define NV_PFAULT_CLIENT_HUB_FECS7 0x0000005B /* */
#define NV_PFAULT_CLIENT_HUB_SKED1 0x0000005C /* */
#define NV_PFAULT_CLIENT_HUB_SKED2 0x0000005D /* */
#define NV_PFAULT_CLIENT_HUB_SKED3 0x0000005E /* */
#define NV_PFAULT_CLIENT_HUB_SKED4 0x0000005F /* */
#define NV_PFAULT_CLIENT_HUB_SKED5 0x00000060 /* */
#define NV_PFAULT_CLIENT_HUB_SKED6 0x00000061 /* */
#define NV_PFAULT_CLIENT_HUB_SKED7 0x00000062 /* */
#define NV_PFAULT_CLIENT_HUB_ESC 0x00000063 /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC5 0x0000006F /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC6 0x00000070 /* */
#define NV_PFAULT_CLIENT_HUB_NVDEC7 0x00000071 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG1 0x00000072 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG2 0x00000073 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG3 0x00000074 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG4 0x00000075 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG5 0x00000076 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG6 0x00000077 /* */
#define NV_PFAULT_CLIENT_HUB_NVJPG7 0x00000078 /* */
#define NV_PFAULT_CLIENT_HUB_FSP 0x00000079 /* */
#define NV_PFAULT_ACCESS_TYPE 19:16 /* */
#define NV_PFAULT_ACCESS_TYPE_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_READ 0x00000000 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_WRITE 0x00000001 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG 0x00000002 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH 0x00000003 /* */
#define NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK 0x00000004 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_READ 0x00000008 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_WRITE 0x00000009 /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC 0x0000000a /* */
#define NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH 0x0000000b /* */
#define NV_PFAULT_MMU_CLIENT_TYPE 20:20 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_GPC 0x00000000 /* */
#define NV_PFAULT_MMU_CLIENT_TYPE_HUB 0x00000001 /* */
#define NV_PFAULT_GPC_ID 28:24 /* */
#define NV_PFAULT_PROTECTED_MODE 29:29 /* */
#define NV_PFAULT_REPLAYABLE_FAULT_EN 30:30 /* */
#define NV_PFAULT_VALID 31:31 /* */
#endif // __gh100_dev_fault_h__

View File

@ -0,0 +1,560 @@
/*******************************************************************************
Copyright (c) 2003-2016 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __gh100_dev_mmu_h__
#define __gh100_dev_mmu_h__
/* This file is autogenerated. Do not edit */
#define NV_MMU_PDE /* ----G */
#define NV_MMU_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PDE__SIZE 8
#define NV_MMU_PTE /* ----G */
#define NV_MMU_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_PTE_LOCK (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_LOCK_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_LOCK_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE (1*32+30):(1*32+30) /* RWXVF */
#define NV_MMU_PTE_READ_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_READ_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE (1*32+31):(1*32+31) /* RWXVF */
#define NV_MMU_PTE_WRITE_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_PTE_WRITE_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_PTE__SIZE 8
#define NV_MMU_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_PTE_KIND (1*32+7):(1*32+4) /* RWXVF */
#define NV_MMU_PTE_KIND_INVALID 0x07 /* R---V */
#define NV_MMU_PTE_KIND_PITCH 0x00 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY 0x6 /* R---V */
#define NV_MMU_PTE_KIND_Z16 0x1 /* R---V */
#define NV_MMU_PTE_KIND_S8 0x2 /* R---V */
#define NV_MMU_PTE_KIND_S8Z24 0x3 /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8 0x4 /* R---V */
#define NV_MMU_PTE_KIND_Z24S8 0x5 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE 0x8 /* R---V */
#define NV_MMU_PTE_KIND_GENERIC_MEMORY_COMPRESSIBLE_DISABLE_PLC 0x9 /* R---V */
#define NV_MMU_PTE_KIND_S8_COMPRESSIBLE_DISABLE_PLC 0xA /* R---V */
#define NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC 0xB /* R---V */
#define NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC 0xC /* R---V */
#define NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC 0xD /* R---V */
#define NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC 0xE /* R---V */
#define NV_MMU_PTE_KIND_SMSKED_MESSAGE 0xF /* R---V */
#define NV_MMU_VER1_PDE /* ----G */
#define NV_MMU_VER1_PDE_APERTURE_BIG (0*32+1):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE (0*32+3):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_SIZE_FULL 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_HALF 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_QUARTER 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_SIZE_EIGHTH 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL (1*32+1):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL (1*32+2):(1*32+2) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_SYS (1*32+31):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID (1*32+31-3):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER (1*32+31):(1*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PDE__SIZE 8
#define NV_MMU_VER1_PTE /* ----G */
#define NV_MMU_VER1_PTE_VALID (0*32+0):(0*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE (0*32+1):(0*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY (0*32+2):(0*32+2) /* RWXVF */
#define NV_MMU_VER1_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_ENCRYPTED (0*32+3):(0*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER1_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER1_PTE_ADDRESS_SYS (0*32+31):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID (0*32+31-3):(0*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER (0*32+31):(0*32+32-3) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER1_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER1_PTE_VOL (1*32+0):(1*32+0) /* RWXVF */
#define NV_MMU_VER1_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE (1*32+2):(1*32+1) /* RWXVF */
#define NV_MMU_VER1_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER1_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE (1*32+3):(1*32+3) /* RWXVF */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER1_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER1_PTE_COMPTAGLINE (1*32+20+11):(1*32+12) /* RWXVF */
#define NV_MMU_VER1_PTE_KIND (1*32+11):(1*32+4) /* RWXVF */
#define NV_MMU_VER1_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER1_PTE__SIZE 8
#define NV_MMU_VER1_PTE_COMPTAGS_NONE 0x0 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_1 0x1 /* */
#define NV_MMU_VER1_PTE_COMPTAGS_2 0x2 /* */
#define NV_MMU_NEW_PDE /* ----G */
#define NV_MMU_NEW_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PDE__SIZE 8
#define NV_MMU_NEW_DUAL_PDE /* ----G */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_NEW_DUAL_PDE__SIZE 16
#define NV_MMU_NEW_PTE /* ----G */
#define NV_MMU_NEW_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_NEW_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_NEW_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_NEW_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_NEW_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_NEW_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_NEW_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_NEW_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_NEW_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_NEW_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_NEW_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_NEW_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_NEW_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_NEW_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_NEW_PTE__SIZE 8
#define NV_MMU_VER2_PDE /* ----G */
#define NV_MMU_VER2_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PDE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PDE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PDE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PDE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PDE__SIZE 8
#define NV_MMU_VER2_DUAL_PDE /* ----G */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_TRUE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_IS_PDE_FALSE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG 3:3 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_BIG_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS 5:5 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_NO_ATS_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SYS 53:(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID (35-3):(8-4) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL 67:67 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_VOL_SMALL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_SYS 117:72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID (99-3):72 /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER 99:(100-3) /* RWXVF */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SMALL_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER2_DUAL_PDE__SIZE 16
#define NV_MMU_VER2_PTE /* ----G */
#define NV_MMU_VER2_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER2_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_VOL 3:3 /* RWXVF */
#define NV_MMU_VER2_PTE_VOL_TRUE 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_VOL_FALSE 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ENCRYPTED 4:4 /* RWXVF */
#define NV_MMU_VER2_PTE_ENCRYPTED_TRUE 0x00000001 /* R---V */
#define NV_MMU_VER2_PTE_ENCRYPTED_FALSE 0x00000000 /* R---V */
#define NV_MMU_VER2_PTE_PRIVILEGE 5:5 /* RWXVF */
#define NV_MMU_VER2_PTE_PRIVILEGE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_PRIVILEGE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY 6:6 /* RWXVF */
#define NV_MMU_VER2_PTE_READ_ONLY_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_READ_ONLY_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE 7:7 /* RWXVF */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER2_PTE_ATOMIC_DISABLE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_SYS 53:8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID (35-3):8 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER 35:(36-3) /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_0 0x00000000 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_1 0x00000001 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_2 0x00000002 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_3 0x00000003 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_4 0x00000004 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_5 0x00000005 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_6 0x00000006 /* RW--V */
#define NV_MMU_VER2_PTE_ADDRESS_VID_PEER_7 0x00000007 /* RW--V */
#define NV_MMU_VER2_PTE_COMPTAGLINE (20+35):36 /* RWXVF */
#define NV_MMU_VER2_PTE_KIND 63:56 /* RWXVF */
#define NV_MMU_VER2_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER2_PTE__SIZE 8
#define NV_MMU_VER3_PDE /* ----G */
#define NV_MMU_VER3_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER3_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER3_PDE_APERTURE_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF 5:3 /* RWXVF */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_PCF_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_PDE_ADDRESS 51:12 /* RWXVF */
#define NV_MMU_VER3_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_PDE__SIZE 8
#define NV_MMU_VER3_DUAL_PDE /* ----G */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE 0:0 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_IS_PTE_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG 2:1 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_BIG_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG 5:3 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_BIG_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG 51:8 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL 66:65 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_VIDEO_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_APERTURE_SMALL_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL 69:67 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED__OR__INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED 0x00000000 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED__OR__SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_ALLOWED 0x00000001 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED__OR__INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_CACHED_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED 0x00000002 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED__OR__SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_PCF_SMALL_SPARSE_ATS_NOT_ALLOWED 0x00000003 /* RW--V */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SMALL 115:76 /* RWXVF */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT 8 /* */
#define NV_MMU_VER3_DUAL_PDE__SIZE 16
#define NV_MMU_VER3_PTE /* ----G */
#define NV_MMU_VER3_PTE_VALID 0:0 /* RWXVF */
#define NV_MMU_VER3_PTE_VALID_TRUE 0x1 /* RW--V */
#define NV_MMU_VER3_PTE_VALID_FALSE 0x0 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE 2:1 /* RWXVF */
#define NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF 7:3 /* RWXVF */
#define NV_MMU_VER3_PTE_PCF_INVALID 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_SPARSE 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_MAPPING_NOWHERE 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_NO_VALID_4KB_PAGE 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACE 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACE 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACE 0x00000004 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACE 0x00000005 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACE 0x00000006 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACE 0x00000007 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE 0x00000008 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE 0x00000009 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE 0x0000000A /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE 0x0000000B /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE 0x0000000C /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000D /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE 0x0000000E /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE 0x0000000F /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD 0x00000010 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD 0x00000011 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_CACHED_ACD 0x00000012 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_ATOMIC_UNCACHED_ACD 0x00000013 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_CACHED_ACD 0x00000014 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_ATOMIC_UNCACHED_ACD 0x00000015 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_CACHED_ACD 0x00000016 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_ATOMIC_UNCACHED_ACD 0x00000017 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD 0x00000018 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD 0x00000019 /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_CACHED_ACD 0x0000001A /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACD 0x0000001B /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD 0x0000001C /* RW--V */
#define NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001D /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_CACHED_ACD 0x0000001E /* RW--V */
#define NV_MMU_VER3_PTE_PCF_PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD 0x0000001F /* RW--V */
#define NV_MMU_VER3_PTE_KIND 11:8 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_SYS 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_PEER 51:12 /* RWXVF */
#define NV_MMU_VER3_PTE_ADDRESS_VID 39:12 /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID 63:(64-3) /* RWXVF */
#define NV_MMU_VER3_PTE_PEER_ID_0 0x00000000 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_1 0x00000001 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_2 0x00000002 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_3 0x00000003 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_4 0x00000004 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_5 0x00000005 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_6 0x00000006 /* RW--V */
#define NV_MMU_VER3_PTE_PEER_ID_7 0x00000007 /* RW--V */
#define NV_MMU_VER3_PTE_ADDRESS_SHIFT 0x0000000c /* */
#define NV_MMU_VER3_PTE__SIZE 8
#define NV_MMU_CLIENT /* ----G */
#define NV_MMU_CLIENT_KIND 2:0 /* RWXVF */
#define NV_MMU_CLIENT_KIND_Z16 0x1 /* R---V */
#define NV_MMU_CLIENT_KIND_S8 0x2 /* R---V */
#define NV_MMU_CLIENT_KIND_S8Z24 0x3 /* R---V */
#define NV_MMU_CLIENT_KIND_ZF32_X24S8 0x4 /* R---V */
#define NV_MMU_CLIENT_KIND_Z24S8 0x5 /* R---V */
#define NV_MMU_CLIENT_KIND_GENERIC_MEMORY 0x6 /* R---V */
#define NV_MMU_CLIENT_KIND_INVALID 0x7 /* R---V */
#endif // __gh100_dev_mmu_h__

View File

@ -1,6 +1,12 @@
NVIDIA_UVM_SOURCES ?= NVIDIA_UVM_SOURCES ?=
NVIDIA_UVM_SOURCES_CXX ?= NVIDIA_UVM_SOURCES_CXX ?=
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_ce.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_hopper_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_ada.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_common.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_linux.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_debug_optimized.c

View File

@ -1094,7 +1094,6 @@ static int uvm_init(void)
if (uvm_enable_builtin_tests) if (uvm_enable_builtin_tests)
pr_info("Built-in UVM tests are enabled. This is a security risk.\n"); pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
// After Open RM is released, both the enclosing "#if" and this comment // After Open RM is released, both the enclosing "#if" and this comment
// block should be removed, because the uvm_hmm_is_enabled_system_wide() // block should be removed, because the uvm_hmm_is_enabled_system_wide()
// check is both necessary and sufficient for reporting functionality. // check is both necessary and sufficient for reporting functionality.
@ -1104,7 +1103,6 @@ static int uvm_init(void)
if (uvm_hmm_is_enabled_system_wide()) if (uvm_hmm_is_enabled_system_wide())
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n"); UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
return 0; return 0;
error: error:
@ -1146,4 +1144,4 @@ module_exit(uvm_exit_entry);
MODULE_LICENSE("Dual MIT/GPL"); MODULE_LICENSE("Dual MIT/GPL");
MODULE_INFO(supported, "external"); MODULE_INFO(supported, "external");
MODULE_VERSION(NV_VERSION_STRING);

View File

@ -216,12 +216,6 @@ NV_STATUS UvmDeinitialize(void);
// Note that it is not required to release VA ranges that were reserved with // Note that it is not required to release VA ranges that were reserved with
// UvmReserveVa(). // UvmReserveVa().
// //
// UvmReopen() closes the open file returned by UvmGetFileDescriptor() and // UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
// replaces it with a new open file with the same name. // replaces it with a new open file with the same name.
// //
@ -416,14 +410,6 @@ NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
// location will have their range group association changed to // location will have their range group association changed to
// UVM_RANGE_GROUP_ID_NONE. // UVM_RANGE_GROUP_ID_NONE.
// //
// Arguments: // Arguments:
// gpuUuid: (INPUT) // gpuUuid: (INPUT)
// UUID of the GPU to unregister. // UUID of the GPU to unregister.
@ -1276,14 +1262,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
// //
// The VA range can be unmapped and freed via a call to UvmFree. // The VA range can be unmapped and freed via a call to UvmFree.
// //
// Arguments: // Arguments:
// base: (INPUT) // base: (INPUT)
// Base address of the virtual address range. // Base address of the virtual address range.
@ -1320,10 +1298,6 @@ NV_STATUS UvmCleanUpZombieResources(void);
// NV_ERR_INVALID_ARGUMENT: // NV_ERR_INVALID_ARGUMENT:
// perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa, // perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
// or caching is requested on more than one GPU. // or caching is requested on more than one GPU.
// //
// NV_ERR_NOT_SUPPORTED: // NV_ERR_NOT_SUPPORTED:
// The current process is not the one which called UvmInitialize, and // The current process is not the one which called UvmInitialize, and

View File

@ -0,0 +1,95 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_gpu.h"
#include "uvm_mem.h"
#include "uvm_ada_fault_buffer.h"
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
{
parent_gpu->tlb_batch.va_invalidate_supported = true;
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
// TODO: Bug 1767241: Run benchmarks to figure out a good number
parent_gpu->tlb_batch.max_ranges = 8;
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
8)));
}
// A single top level PDE on Ada covers 128 TB and that's the minimum size
// that can be used.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 128ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = 384ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// Not all units on Ada support 49-bit addressing, including those which
// access channel buffers.
parent_gpu->max_channel_va = 1ULL << 40;
parent_gpu->max_host_va = 1ULL << 40;
// Ada can map sysmem with any page size
parent_gpu->can_map_sysmem_with_large_pages = true;
// Prefetch instructions will generate faults
parent_gpu->prefetch_fault_supported = true;
// Ada can place GPFIFO in vidmem
parent_gpu->gpfifo_in_vidmem_supported = true;
parent_gpu->replayable_faults_supported = true;
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
parent_gpu->has_clear_faulted_channel_sw_method = true;
parent_gpu->has_clear_faulted_channel_method = false;
parent_gpu->smc.supported = true;
parent_gpu->sparse_mappings_supported = true;
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
}

View File

@ -0,0 +1,84 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_HAL_ADA_FAULT_BUFFER_H__
#define __UVM_HAL_ADA_FAULT_BUFFER_H__
#include "nvtypes.h"
#include "uvm_common.h"
#include "uvm_gpu.h"
// There are up to 6 TPCs per GPC in Ada, and there are 2 LTP uTLB per TPC.
// Besides, there is one RGG uTLB per GPC. Each TPC has a number of clients
// that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests from
// these units are routed as follows to the 2 LTP uTLBs:
//
// -------- ---------
// | T1_0 | -----------------> | uTLB0 |
// -------- ---------
//
// -------- ---------
// | T1_1 | -----------------> | uTLB1 |
// -------- --------> ---------
// | ^
// ------- | |
// | PE | ----------- |
// ------- |
// |
// --------- |
// | TPCCS | -----------------------
// ---------
//
//
// The client ids are local to their GPC and the id mapping is linear across
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
//
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
// be ignored. These will never be reported in a fault message, and should
// never be used in an invalidate. Therefore, we define our own values.
typedef enum {
UVM_ADA_GPC_UTLB_ID_RGG = 0,
UVM_ADA_GPC_UTLB_ID_LTP0 = 1,
UVM_ADA_GPC_UTLB_ID_LTP1 = 2,
UVM_ADA_GPC_UTLB_ID_LTP2 = 3,
UVM_ADA_GPC_UTLB_ID_LTP3 = 4,
UVM_ADA_GPC_UTLB_ID_LTP4 = 5,
UVM_ADA_GPC_UTLB_ID_LTP5 = 6,
UVM_ADA_GPC_UTLB_ID_LTP6 = 7,
UVM_ADA_GPC_UTLB_ID_LTP7 = 8,
UVM_ADA_GPC_UTLB_ID_LTP8 = 9,
UVM_ADA_GPC_UTLB_ID_LTP9 = 10,
UVM_ADA_GPC_UTLB_ID_LTP10 = 11,
UVM_ADA_GPC_UTLB_ID_LTP11 = 12,
UVM_ADA_GPC_UTLB_COUNT,
} uvm_ada_gpc_utlb_id_t;
static NvU32 uvm_ada_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
{
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
UVM_ASSERT(utlbs <= UVM_ADA_GPC_UTLB_COUNT);
return utlbs;
}
#endif

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation Copyright (c) 2018-20221 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -53,7 +53,7 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE; parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
// See uvm_mmu.h for mapping placement // See uvm_mmu.h for mapping placement
parent_gpu->flat_vidmem_va_base = 132ull * 1024 * 1024 * 1024 * 1024; parent_gpu->flat_vidmem_va_base = 136ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024; parent_gpu->flat_sysmem_va_base = 256ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode; parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2018-2021 NVIDIA Corporation Copyright (c) 2018-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -107,6 +107,8 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
uvm_spin_loop_t spin; uvm_spin_loop_t spin;
NvU32 channel_faulted_mask = 0; NvU32 channel_faulted_mask = 0;
NvU32 clear_type_value = 0; NvU32 clear_type_value = 0;
NvU32 doorbell_value = 0;
volatile NvU32 *doorbell_ptr;
UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method); UVM_ASSERT(!user_channel->gpu->parent->has_clear_faulted_channel_method);
@ -123,6 +125,12 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type)); uvm_mmu_engine_type_string(fault->fault_source.mmu_engine_type));
} }
doorbell_ptr = (NvU32 *)((NvU8 *)user_channel->runlist_pri_base_register + NV_RUNLIST_INTERNAL_DOORBELL);
// GFID is not required since we clear faulted channel with a SW method on
// SRIOV. On baremetal, GFID is always zero.
doorbell_value = HWVALUE(_RUNLIST, INTERNAL_DOORBELL, CHID, user_channel->hw_channel_id);
// Wait for the channel to have the FAULTED bit set as this can race with // Wait for the channel to have the FAULTED bit set as this can race with
// interrupt notification // interrupt notification
UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin); UVM_SPIN_WHILE(!(UVM_GPU_READ_ONCE(*user_channel->chram_channel_register) & channel_faulted_mask), &spin);
@ -131,7 +139,7 @@ void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user
wmb(); wmb();
UVM_GPU_WRITE_ONCE(*user_channel->work_submission_offset, user_channel->work_submission_token); UVM_GPU_WRITE_ONCE(*doorbell_ptr, doorbell_value);
} }
static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture) static NvU32 instance_ptr_aperture_type_to_hw_value(uvm_aperture_t aperture)

View File

@ -58,12 +58,6 @@ NV_STATUS uvm_ats_add_gpu(uvm_parent_gpu_t *parent_gpu)
return uvm_ats_ibm_add_gpu(parent_gpu); return uvm_ats_ibm_add_gpu(parent_gpu);
} }
return NV_OK; return NV_OK;
} }
@ -77,12 +71,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu)
uvm_ats_ibm_remove_gpu(parent_gpu); uvm_ats_ibm_remove_gpu(parent_gpu);
} }
} }
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space) NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
@ -100,10 +88,6 @@ NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED()) if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_bind_gpu(gpu_va_space); status = uvm_ats_ibm_bind_gpu(gpu_va_space);
return status; return status;
} }
@ -116,10 +100,6 @@ void uvm_ats_unbind_gpu(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED()) if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unbind_gpu(gpu_va_space); uvm_ats_ibm_unbind_gpu(gpu_va_space);
} }
NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space) NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
@ -147,10 +127,6 @@ NV_STATUS uvm_ats_register_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED()) if (UVM_ATS_IBM_SUPPORTED())
status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space); status = uvm_ats_ibm_register_gpu_va_space(gpu_va_space);
if (status == NV_OK) if (status == NV_OK)
uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id); uvm_processor_mask_set(&va_space->ats.registered_gpu_va_spaces, gpu_id);
@ -173,10 +149,6 @@ void uvm_ats_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space)
if (UVM_ATS_IBM_SUPPORTED()) if (UVM_ATS_IBM_SUPPORTED())
uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space); uvm_ats_ibm_unregister_gpu_va_space(gpu_va_space);
uvm_va_space_down_write(va_space); uvm_va_space_down_write(va_space);
uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id); uvm_processor_mask_clear(&va_space->ats.registered_gpu_va_spaces, gpu_id);
uvm_va_space_up_write(va_space); uvm_va_space_up_write(va_space);

View File

@ -29,14 +29,8 @@
#include "uvm_ats_ibm.h" #include "uvm_ats_ibm.h"
#include "nv_uvm_types.h" #include "nv_uvm_types.h"
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED()) #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED())
typedef struct typedef struct
{ {
// Mask of gpu_va_spaces which are registered for ATS access. The mask is // Mask of gpu_va_spaces which are registered for ATS access. The mask is
@ -47,11 +41,6 @@ typedef struct
{ {
uvm_ibm_va_space_t ibm; uvm_ibm_va_space_t ibm;
}; };
} uvm_ats_va_space_t; } uvm_ats_va_space_t;
@ -69,11 +58,6 @@ typedef struct
{ {
uvm_ibm_gpu_va_space_t ibm; uvm_ibm_gpu_va_space_t ibm;
}; };
} uvm_ats_gpu_va_space_t; } uvm_ats_gpu_va_space_t;
@ -106,10 +90,6 @@ void uvm_ats_remove_gpu(uvm_parent_gpu_t *parent_gpu);
// LOCKING: mmap_lock must be lockable. // LOCKING: mmap_lock must be lockable.
// VA space lock must be lockable. // VA space lock must be lockable.
// gpu_va_space->gpu must be retained. // gpu_va_space->gpu must be retained.
NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space); NV_STATUS uvm_ats_bind_gpu(uvm_gpu_va_space_t *gpu_va_space);
// Decrements the refcount on the {gpu, mm} pair. Removes the binding from the // Decrements the refcount on the {gpu, mm} pair. Removes the binding from the

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -52,13 +52,13 @@ static NV_STATUS test_non_pipelined(uvm_gpu_t *gpu)
uvm_push_t push; uvm_push_t push;
bool is_proxy; bool is_proxy;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, &host_mem); status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, CE_TEST_MEM_SIZE, 0, &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem); host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
memset(host_ptr, 0, CE_TEST_MEM_SIZE); memset(host_ptr, 0, CE_TEST_MEM_SIZE);
for (i = 0; i < CE_TEST_MEM_COUNT; ++i) { for (i = 0; i < CE_TEST_MEM_COUNT; ++i) {
status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, &mem[i]); status = uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, CE_TEST_MEM_SIZE, 0, &mem[i]);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
} }
@ -167,7 +167,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
uvm_push_t push; uvm_push_t push;
NvU32 value; NvU32 value;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), &host_mem); status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(NvU32), 0, &host_mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem); host_ptr = (NvU32 *)uvm_rm_mem_get_cpu_va(host_mem);
*host_ptr = 0; *host_ptr = 0;
@ -429,13 +429,13 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size); gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
// Virtual address (in UVM's internal address space) backed by vidmem // Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, &gpu_rm_mem), done); TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
is_proxy_va_space = false; is_proxy_va_space = false;
gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space); gpu_va = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va); gpu_addresses[2] = uvm_gpu_address_virtual(gpu_va);
// Virtual address (in UVM's internal address space) backed by sysmem // Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, &sys_rm_mem), done); TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space); gpu_va = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va); gpu_addresses[3] = uvm_gpu_address_virtual(gpu_va);

View File

@ -97,8 +97,11 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
if (mode == UVM_CHANNEL_UPDATE_MODE_COMPLETED && entry->tracking_semaphore_value > completed_value) if (mode == UVM_CHANNEL_UPDATE_MODE_COMPLETED && entry->tracking_semaphore_value > completed_value)
break; break;
uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry); if (entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos); uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
}
gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries; gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries;
++completed_count; ++completed_count;
} }
@ -150,25 +153,31 @@ NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager
return pending_gpfifos; return pending_gpfifos;
} }
static bool channel_is_available(uvm_channel_t *channel) static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{ {
NvU32 next_put; NvU32 pending_entries;
uvm_assert_spinlock_locked(&channel->pool->lock); uvm_assert_spinlock_locked(&channel->pool->lock);
next_put = (channel->cpu_put + channel->current_pushes_count + 1) % channel->num_gpfifo_entries; if (channel->cpu_put >= channel->gpu_get)
pending_entries = channel->cpu_put - channel->gpu_get;
else
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
return (next_put != channel->gpu_get); return (pending_entries + channel->current_gpfifo_count + num_gpfifo_entries < channel->num_gpfifo_entries);
} }
static bool try_claim_channel(uvm_channel_t *channel) static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{ {
bool claimed = false; bool claimed = false;
UVM_ASSERT(num_gpfifo_entries > 0);
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
uvm_spin_lock(&channel->pool->lock); uvm_spin_lock(&channel->pool->lock);
if (channel_is_available(channel)) { if (channel_is_available(channel, num_gpfifo_entries)) {
++channel->current_pushes_count; channel->current_gpfifo_count += num_gpfifo_entries;
claimed = true; claimed = true;
} }
@ -179,26 +188,14 @@ static bool try_claim_channel(uvm_channel_t *channel)
static void lock_push(uvm_channel_t *channel) static void lock_push(uvm_channel_t *channel)
{ {
} }
static void unlock_push(uvm_channel_t *channel) static void unlock_push(uvm_channel_t *channel)
{ {
} }
static bool trylock_push(uvm_channel_t *channel) static bool trylock_push(uvm_channel_t *channel)
{ {
return true; return true;
} }
@ -212,13 +209,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
uvm_for_each_channel_in_pool(channel, pool) { uvm_for_each_channel_in_pool(channel, pool) {
// TODO: Bug 1764953: Prefer idle/less busy channels // TODO: Bug 1764953: Prefer idle/less busy channels
if (trylock_push(channel)) { if (trylock_push(channel)) {
if (try_claim_channel(channel)) { if (try_claim_channel(channel, 1)) {
*channel_out = channel; *channel_out = channel;
return NV_OK; return NV_OK;
} }
@ -235,7 +227,7 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
uvm_channel_update_progress(channel); uvm_channel_update_progress(channel);
if (try_claim_channel(channel)) { if (try_claim_channel(channel, 1)) {
lock_push(channel); lock_push(channel);
*channel_out = channel; *channel_out = channel;
@ -319,13 +311,6 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
manager = channel->pool->manager; manager = channel->pool->manager;
status = uvm_pushbuffer_begin_push(manager->pushbuffer, push); status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -407,10 +392,6 @@ static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va,
if (uvm_channel_is_ce(push->channel)) if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload); gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
else else
UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n"); UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
} }
@ -447,9 +428,10 @@ void uvm_channel_end_push(uvm_push_t *push)
entry->pushbuffer_offset = uvm_pushbuffer_get_offset_for_push(pushbuffer, push); entry->pushbuffer_offset = uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
entry->pushbuffer_size = push_size; entry->pushbuffer_size = push_size;
entry->push_info = &channel->push_infos[push->push_info_index]; entry->push_info = &channel->push_infos[push->push_info_index];
entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;
UVM_ASSERT(channel->current_pushes_count > 0); UVM_ASSERT(channel->current_gpfifo_count > 0);
--channel->current_pushes_count; --channel->current_gpfifo_count;
if (uvm_channel_is_proxy(channel)) if (uvm_channel_is_proxy(channel))
proxy_channel_submit_work(push, push_size); proxy_channel_submit_work(push, push_size);
@ -477,18 +459,116 @@ void uvm_channel_end_push(uvm_push_t *push)
push->channel_tracking_value = new_tracking_value; push->channel_tracking_value = new_tracking_value;
} }
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel) // The caller must submit a normal GPFIFO entry with a semaphore release
// following the control GPFIFO, refer to uvm_channel_write_ctrl_gpfifo() for an
// example.
static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
{
uvm_gpfifo_entry_t *entry;
NvU64 *gpfifo_entry;
NvU32 cpu_put;
NvU32 new_cpu_put;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_spin_lock(&channel->pool->lock);
cpu_put = channel->cpu_put;
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
entry = &channel->gpfifo_entries[cpu_put];
memset(entry, 0, sizeof(*entry));
entry->type = UVM_GPFIFO_ENTRY_TYPE_CONTROL;
// Control GPFIFO entries are followed by a semaphore_release push in UVM.
// We add the tracking semaphore value of the next GPFIFO entry,
// potentially the associated semaphore release push. Even if a different
// GPFIFO entry sneaks in, the purposes of signaling that this control
// GPFIFO entry has been processed is accomplished.
entry->tracking_semaphore_value = channel->tracking_sem.queued_value + 1;
UVM_ASSERT(channel->current_gpfifo_count > 1);
--channel->current_gpfifo_count;
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
*gpfifo_entry = ctrl_fifo_entry_value;
// Need to make sure all the GPFIFO entries writes complete before updating
// GPPUT. We also don't want any reads to be moved after the GPPut write as
// the GPU might modify the data they read as soon as the GPPut write
// happens.
mb();
gpu->parent->host_hal->write_gpu_put(channel, new_cpu_put);
channel->cpu_put = new_cpu_put;
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
// may notice the GPU work to be completed and hence all state tracking the
// push must be updated before that.
uvm_spin_unlock(&channel->pool->lock);
unlock_push(channel);
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
// issues on some systems. Comment from CUDA: "fixes throughput-related
// performance problems, e.g. bugs 626179, 593841. This may be related to
// bug 124888, which GL works around by doing a clflush"
wmb();
}
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
{
NV_STATUS status;
uvm_gpu_t *gpu = channel->pool->manager->gpu;
uvm_push_t push;
UVM_ASSERT(!uvm_channel_is_proxy(channel));
// We reserve two GPFIFO entries, i.e., the control GPFIFO entry and the
// subsequent semaphore_release push. There is, however, a potential case
// for GPFIFO control submission starvation. This may happen because a
// GPFIFO control submission requires two available GPFIFO entries. If you
// have a full GPFIFO ring buffer that frees one entry at a time, while
// there is another thread consuming this recently released entry at the
// same rate, a concurrent thread trying to reserve two entries for a GPFIFO
// control submission may starve. We could handle this by imposing minimal
// release entries in uvm_channel.c:uvm_channel_update_progress(). Instead,
// we don't deal with this potential starvation case because:
// - Control GPFIFO are rarely used.
// - By default, we release up to 8 GPFIFO entries at a time, except if the
// release rate is constrained by lengthy pushbuffers -- another rare
// situation.
// - It would add unnecessary complexity to channel_update_progress().
status = uvm_channel_reserve(channel, 2);
if (status != NV_OK)
return status;
write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);
status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status;
}
// This is an empty push, the push's embedded semaphore_release signals that
// the GPFIFO control entry has been processed.
uvm_push_end(&push);
return NV_OK;
}
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{ {
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
uvm_spin_loop_t spin; uvm_spin_loop_t spin;
if (try_claim_channel(channel)) if (try_claim_channel(channel, num_gpfifo_entries))
goto out; goto out;
uvm_channel_update_progress(channel); uvm_channel_update_progress(channel);
uvm_spin_loop_init(&spin); uvm_spin_loop_init(&spin);
while (!try_claim_channel(channel) && status == NV_OK) { while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
UVM_SPIN_LOOP(&spin); UVM_SPIN_LOOP(&spin);
status = uvm_channel_check_errors(channel); status = uvm_channel_check_errors(channel);
uvm_channel_update_progress(channel); uvm_channel_update_progress(channel);
@ -568,12 +648,28 @@ NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
fatal_entry = uvm_channel_get_fatal_entry(channel); fatal_entry = uvm_channel_get_fatal_entry(channel);
if (fatal_entry != NULL) { if (fatal_entry != NULL) {
uvm_push_info_t *push_info = fatal_entry->push_info; if (fatal_entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n", uvm_push_info_t *push_info = fatal_entry->push_info;
push_info->description, push_info->filename, push_info->line, push_info->function); UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
push_info->description,
push_info->filename,
push_info->line,
push_info->function);
}
else {
NvU64 *gpfifo_entry;
UVM_ASSERT(!uvm_channel_is_proxy(channel));
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + (fatal_entry - channel->gpfifo_entries);
UVM_ERR_PRINT("Channel error likely caused by GPFIFO control entry, data: 0x%llx, gpu_get: %d\n",
*gpfifo_entry,
channel->gpu_get);
}
} }
uvm_global_set_fatal_error(status); uvm_global_set_fatal_error(status);
return status; return status;
} }
@ -608,40 +704,6 @@ NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem); return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
} }
static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel) static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{ {
UVM_ASSERT(pool->num_channels > 0); UVM_ASSERT(pool->num_channels > 0);
@ -667,10 +729,6 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
uvm_kvfree(channel->gpfifo_entries); uvm_kvfree(channel->gpfifo_entries);
if (uvm_channel_is_proxy(channel)) if (uvm_channel_is_proxy(channel))
uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle)); uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle));
else else
@ -694,11 +752,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
if (uvm_channel_is_ce(channel)) { if (uvm_channel_is_ce(channel)) {
UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE); UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
} }
memset(&channel_alloc_params, 0, sizeof(channel_alloc_params)); memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
@ -710,10 +763,6 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
if (uvm_channel_is_ce(channel)) if (uvm_channel_is_ce(channel))
channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE; channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;
status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space, status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
&channel_alloc_params, &channel_alloc_params,
&channel->handle, &channel->handle,
@ -732,11 +781,7 @@ static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine
channel_info->hwChannelId, channel_info->hwChannelId,
channel_info->hwRunlistId, channel_info->hwRunlistId,
channel_info->hwChannelId, channel_info->hwChannelId,
"CE", "CE",
engine_index); engine_index);
return NV_OK; return NV_OK;
@ -803,12 +848,6 @@ static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel
if (status != NV_OK) if (status != NV_OK)
goto error; goto error;
channel->num_gpfifo_entries = manager->conf.num_gpfifo_entries; channel->num_gpfifo_entries = manager->conf.num_gpfifo_entries;
channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries); channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
if (channel->gpfifo_entries == NULL) { if (channel->gpfifo_entries == NULL) {
@ -858,8 +897,25 @@ static NV_STATUS init_channel(uvm_channel_t *channel)
{ {
uvm_push_t push; uvm_push_t push;
uvm_gpu_t *gpu = uvm_channel_get_gpu(channel); uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
NV_STATUS status = uvm_push_begin_on_channel(channel, &push, "Init channel"); NV_STATUS status;
NvU32 num_entries = 1;
if (uvm_gpu_has_pushbuffer_segments(gpu))
num_entries++;
status = uvm_channel_reserve(channel, num_entries);
if (status != NV_OK)
return status;
if (uvm_gpu_has_pushbuffer_segments(gpu)) {
NvU64 gpfifo_entry;
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
gpu->parent->host_hal->set_gpfifo_pushbuffer_segment_base(&gpfifo_entry,
uvm_pushbuffer_get_gpu_va_base(pushbuffer));
write_ctrl_gpfifo(channel, gpfifo_entry);
}
status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
if (status != NV_OK) { if (status != NV_OK) {
UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu)); UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status; return status;
@ -868,10 +924,6 @@ static NV_STATUS init_channel(uvm_channel_t *channel)
if (uvm_channel_is_ce(channel)) if (uvm_channel_is_ce(channel))
gpu->parent->ce_hal->init(&push); gpu->parent->ce_hal->init(&push);
gpu->parent->host_hal->init(&push); gpu->parent->host_hal->init(&push);
status = uvm_push_end_and_wait(&push); status = uvm_push_end_and_wait(&push);
@ -1348,12 +1400,6 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
num_channel_pools++; num_channel_pools++;
manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools); manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
if (!manager->channel_pools) if (!manager->channel_pools)
return NV_ERR_NO_MEMORY; return NV_ERR_NO_MEMORY;
@ -1384,17 +1430,6 @@ static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
manager->pool_to_use.default_for_type[channel_type] = pool; manager->pool_to_use.default_for_type[channel_type] = pool;
} }
return NV_OK; return NV_OK;
} }
@ -1520,39 +1555,25 @@ uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 poo
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type) const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)
{ {
BUILD_BUG_ON(UVM_CHANNEL_TYPE_COUNT != 5); BUILD_BUG_ON(UVM_CHANNEL_TYPE_COUNT != 5);
switch (channel_type) { switch (channel_type) {
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_CPU_TO_GPU); UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_CPU_TO_GPU);
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_CPU); UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_CPU);
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_INTERNAL); UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_INTERNAL);
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_MEMOPS); UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_MEMOPS);
UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_GPU); UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_GPU);
UVM_ENUM_STRING_DEFAULT(); UVM_ENUM_STRING_DEFAULT();
} }
} }
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type) const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
{ {
BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 2); BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 2);
switch (channel_pool_type) { switch (channel_pool_type) {
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE); UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE);
UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE_PROXY); UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE_PROXY);
UVM_ENUM_STRING_DEFAULT(); UVM_ENUM_STRING_DEFAULT();
} }
} }
@ -1630,26 +1651,41 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
if (entry->tracking_semaphore_value + finished_pushes_count <= completed_value) if (entry->tracking_semaphore_value + finished_pushes_count <= completed_value)
continue; continue;
// Obtain the value acquire tracking information from the push_info index if (entry->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
if (uvm_push_info_is_tracking_acquires()) { NvU64 *gpfifo_entry;
NvU32 push_info_index = push_info - channel->push_infos;
UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
push_acquire_info = &channel->push_acquire_infos[push_info_index]; UVM_ASSERT(!uvm_channel_is_proxy(channel));
gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + gpu_get;
UVM_SEQ_OR_DBG_PRINT(seq,
" control GPFIFO entry - data: 0x%llx, gpu_get: %d\n",
*gpfifo_entry,
gpu_get);
} }
else {
UVM_SEQ_OR_DBG_PRINT(seq, // Obtain the value acquire tracking information from the push_info
" %s push '%s' started at %s:%d in %s() releasing value %llu%s", // index
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending", if (uvm_push_info_is_tracking_acquires()) {
push_info->description, NvU32 push_info_index = push_info - channel->push_infos;
push_info->filename, UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);
push_info->line,
push_info->function,
entry->tracking_semaphore_value,
!push_acquire_info || push_acquire_info->num_values == 0? "\n" : "");
if (push_acquire_info) push_acquire_info = &channel->push_acquire_infos[push_info_index];
channel_print_push_acquires(push_acquire_info, seq); }
UVM_SEQ_OR_DBG_PRINT(seq,
" %s push '%s' started at %s:%d in %s() releasing value %llu%s",
entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
push_info->description,
push_info->filename,
push_info->line,
push_info->function,
entry->tracking_semaphore_value,
!push_acquire_info || push_acquire_info->num_values == 0 ? "\n" : "");
if (push_acquire_info)
channel_print_push_acquires(push_acquire_info, seq);
}
} }
uvm_spin_unlock(&channel->pool->lock); uvm_spin_unlock(&channel->pool->lock);
} }
@ -1694,7 +1730,7 @@ static int nv_procfs_read_manager_pending_pushes(struct seq_file *s, void *v)
uvm_channel_manager_t *manager = (uvm_channel_manager_t *)s->private; uvm_channel_manager_t *manager = (uvm_channel_manager_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN; return -EAGAIN;
channel_manager_print_pending_pushes(manager, s); channel_manager_print_pending_pushes(manager, s);
@ -1733,7 +1769,7 @@ static int nv_procfs_read_channel_info(struct seq_file *s, void *v)
uvm_channel_t *channel = (uvm_channel_t *)s->private; uvm_channel_t *channel = (uvm_channel_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN; return -EAGAIN;
uvm_channel_print_info(channel, s); uvm_channel_print_info(channel, s);
@ -1754,7 +1790,7 @@ static int nv_procfs_read_channel_pushes(struct seq_file *s, void *v)
uvm_channel_t *channel = (uvm_channel_t *)s->private; uvm_channel_t *channel = (uvm_channel_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN; return -EAGAIN;
// Include up to 5 finished pushes for some context // Include up to 5 finished pushes for some context
channel_print_pushes(channel, 5, s); channel_print_pushes(channel, 5, s);

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -83,18 +83,7 @@ typedef enum
// ^^^^^^ // ^^^^^^
// Channel types backed by a CE. // Channel types backed by a CE.
UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT, UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
} uvm_channel_type_t; } uvm_channel_type_t;
typedef enum typedef enum
@ -112,34 +101,43 @@ typedef enum
// There is a single proxy pool and channel per GPU. // There is a single proxy pool and channel per GPU.
UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1), UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
UVM_CHANNEL_POOL_TYPE_COUNT = 2, UVM_CHANNEL_POOL_TYPE_COUNT = 2,
// A mask used to select pools of any type. // A mask used to select pools of any type.
UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1) UVM_CHANNEL_POOL_TYPE_MASK = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
} uvm_channel_pool_type_t; } uvm_channel_pool_type_t;
typedef enum
{
// Push-based GPFIFO entry
UVM_GPFIFO_ENTRY_TYPE_NORMAL,
// Control GPFIFO entry, i.e., the LENGTH field is zero, not associated with
// a push.
UVM_GPFIFO_ENTRY_TYPE_CONTROL
} uvm_gpfifo_entry_type_t;
struct uvm_gpfifo_entry_struct struct uvm_gpfifo_entry_struct
{ {
// Offset of the pushbuffer in the pushbuffer allocation used by this entry uvm_gpfifo_entry_type_t type;
// Channel tracking semaphore value that indicates completion of
// this entry.
NvU64 tracking_semaphore_value;
// The following fields are only valid when type is
// UVM_GPFIFO_ENTRY_TYPE_NORMAL.
// Offset of the pushbuffer in the pushbuffer allocation used by
// this entry.
NvU32 pushbuffer_offset; NvU32 pushbuffer_offset;
// Size of the pushbuffer used for this entry // Size of the pushbuffer used for this entry.
NvU32 pushbuffer_size; NvU32 pushbuffer_size;
// List node used by the pushbuffer tracking // List node used by the pushbuffer tracking
struct list_head pending_list_node; struct list_head pending_list_node;
// Channel tracking semaphore value that indicates completion of this entry
NvU64 tracking_semaphore_value;
// Push info for the pending push that used this GPFIFO entry // Push info for the pending push that used this GPFIFO entry
uvm_push_info_t *push_info; uvm_push_info_t *push_info;
}; };
@ -193,10 +191,10 @@ struct uvm_channel_struct
// for completion. // for completion.
NvU32 gpu_get; NvU32 gpu_get;
// Number of currently on-going pushes on this channel // Number of currently on-going gpfifo entries on this channel
// A new push is only allowed to begin on the channel if there is a free // A new push or control GPFIFO is only allowed to begin on the channel if
// GPFIFO entry for it. // there is a free GPFIFO entry for it.
NvU32 current_pushes_count; NvU32 current_gpfifo_count;
// Array of uvm_push_info_t for all pending pushes on the channel // Array of uvm_push_info_t for all pending pushes on the channel
uvm_push_info_t *push_infos; uvm_push_info_t *push_infos;
@ -211,30 +209,10 @@ struct uvm_channel_struct
// been marked as completed. // been marked as completed.
struct list_head available_push_infos; struct list_head available_push_infos;
// GPU tracking semaphore tracking the work in the channel // GPU tracking semaphore tracking the work in the channel.
// Each push on the channel increments the semaphore, see // Each push on the channel increments the semaphore, see
// uvm_channel_end_push(). // uvm_channel_end_push().
uvm_gpu_tracking_semaphore_t tracking_sem; uvm_gpu_tracking_semaphore_t tracking_sem;
// RM channel information // RM channel information
union union
@ -343,14 +321,6 @@ static bool uvm_channel_is_ce(uvm_channel_t *channel)
return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel); return (channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_is_proxy(channel);
} }
// Proxy channels are used to push page tree related methods, so their channel // Proxy channels are used to push page tree related methods, so their channel
// type is UVM_CHANNEL_TYPE_MEMOPS. // type is UVM_CHANNEL_TYPE_MEMOPS.
static uvm_channel_type_t uvm_channel_proxy_channel_type(void) static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
@ -437,8 +407,8 @@ NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
uvm_gpu_t *dst_gpu, uvm_gpu_t *dst_gpu,
uvm_channel_t **channel_out); uvm_channel_t **channel_out);
// Reserve a specific channel for a push // Reserve a specific channel for a push or for a control GPFIFO entry.
NV_STATUS uvm_channel_reserve(uvm_channel_t *channel); NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
// Set optimal CE for P2P transfers between manager->gpu and peer // Set optimal CE for P2P transfers between manager->gpu and peer
void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce); void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
@ -451,6 +421,17 @@ NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
// Should be used by uvm_push_end() only. // Should be used by uvm_push_end() only.
void uvm_channel_end_push(uvm_push_t *push); void uvm_channel_end_push(uvm_push_t *push);
// Write/send a control GPFIFO to channel. This is not supported by proxy
// channels.
// Ordering guarantees:
// Input: Control GPFIFO entries are guaranteed to be processed by ESCHED after
// all prior GPFIFO entries and pushbuffers have been fetched, but not
// necessarily completed.
// Output ordering: A caller can wait for this control entry to complete with
// uvm_channel_manager_wait(), or by waiting for any later push in the same
// channel to complete.
NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value);
const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type); const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type); const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -60,7 +60,7 @@ static NV_STATUS test_ordering(uvm_va_space_t *va_space)
gpu = uvm_va_space_find_first_gpu(va_space); gpu = uvm_va_space_find_first_gpu(va_space);
TEST_CHECK_RET(gpu != NULL); TEST_CHECK_RET(gpu != NULL);
status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, &mem); status = uvm_rm_mem_alloc_and_map_all(gpu, UVM_RM_MEM_TYPE_SYS, buffer_size, 0, &mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem); host_mem = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
@ -306,7 +306,6 @@ static NV_STATUS test_rc(uvm_va_space_t *va_space)
return NV_OK; return NV_OK;
} }
typedef struct typedef struct
{ {
uvm_push_t push; uvm_push_t push;
@ -481,12 +480,14 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
status = uvm_rm_mem_alloc_and_map_all(gpu, status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS, UVM_RM_MEM_TYPE_SYS,
MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32), MAX_COUNTER_REPEAT_COUNT * sizeof(NvU32),
0,
&stream->counter_mem); &stream->counter_mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
status = uvm_rm_mem_alloc_and_map_all(gpu, status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS, UVM_RM_MEM_TYPE_SYS,
TEST_SNAPSHOT_SIZE(iterations_per_stream), TEST_SNAPSHOT_SIZE(iterations_per_stream),
0,
&stream->counter_snapshots_mem); &stream->counter_snapshots_mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
@ -495,6 +496,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
status = uvm_rm_mem_alloc_and_map_all(gpu, status = uvm_rm_mem_alloc_and_map_all(gpu,
UVM_RM_MEM_TYPE_SYS, UVM_RM_MEM_TYPE_SYS,
TEST_SNAPSHOT_SIZE(iterations_per_stream), TEST_SNAPSHOT_SIZE(iterations_per_stream),
0,
&stream->other_stream_counter_snapshots_mem); &stream->other_stream_counter_snapshots_mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
@ -565,6 +567,7 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
stream->counter_snapshots_mem, stream->counter_snapshots_mem,
i, i,
stream->queued_counter_repeat); stream->queued_counter_repeat);
// Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters // Set a random number [2, MAX_COUNTER_REPEAT_COUNT] of counters
stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT); stream->queued_counter_repeat = uvm_test_rng_range_32(&rng, 2, MAX_COUNTER_REPEAT_COUNT);
set_counter(&stream->push, set_counter(&stream->push,
@ -669,61 +672,215 @@ done:
return status; return status;
} }
NV_STATUS test_write_ctrl_gpfifo_noop(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_manager_t *manager = gpu->channel_manager;
uvm_channel_pool_t *pool;
uvm_for_each_pool(pool, manager) {
uvm_channel_t *channel;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
if (uvm_channel_is_proxy(channel))
continue;
// We submit 8x the channel's GPFIFO entries to force a few
// complete loops in the GPFIFO circular buffer.
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
NvU64 entry;
gpu->parent->host_hal->set_gpfifo_noop(&entry);
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
}
}
}
}
return NV_OK;
}
NV_STATUS test_write_ctrl_gpfifo_and_pushes(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_manager_t *manager = gpu->channel_manager;
uvm_channel_pool_t *pool;
uvm_for_each_pool(pool, manager) {
uvm_channel_t *channel;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
uvm_push_t push;
if (uvm_channel_is_proxy(channel))
continue;
// We submit 8x the channel's GPFIFO entries to force a few
// complete loops in the GPFIFO circular buffer.
for (i = 0; i < 8 * channel->num_gpfifo_entries; i++) {
if (i % 2 == 0) {
NvU64 entry;
gpu->parent->host_hal->set_gpfifo_noop(&entry);
TEST_NV_CHECK_RET(uvm_channel_write_ctrl_gpfifo(channel, entry));
}
else {
TEST_NV_CHECK_RET(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl and push test"));
uvm_push_end(&push);
}
}
TEST_NV_CHECK_RET(uvm_push_wait(&push));
}
}
}
return NV_OK;
}
static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
{
NvU32 pending_entries;
uvm_spin_lock(&channel->pool->lock);
if (channel->cpu_put >= channel->gpu_get)
pending_entries = channel->cpu_put - channel->gpu_get;
else
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
uvm_spin_unlock(&channel->pool->lock);
return channel->num_gpfifo_entries - pending_entries - 1;
}
NV_STATUS test_write_ctrl_gpfifo_tight(uvm_va_space_t *va_space)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu;
uvm_channel_t *channel;
uvm_rm_mem_t *mem;
NvU32 *cpu_ptr;
NvU64 gpu_va;
NvU32 i;
NvU64 entry;
uvm_push_t push;
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_manager_t *manager = gpu->channel_manager;
gpu = manager->gpu;
TEST_NV_CHECK_RET(uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, sizeof(*cpu_ptr), 0, &mem));
cpu_ptr = uvm_rm_mem_get_cpu_va(mem);
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);
*cpu_ptr = 0;
// This semaphore acquire takes 1 GPFIFO entries.
TEST_NV_CHECK_GOTO(uvm_push_begin(manager, UVM_CHANNEL_TYPE_GPU_TO_GPU, &push, "gpfifo ctrl tight test acq"),
error);
channel = push.channel;
UVM_ASSERT(!uvm_channel_is_proxy(channel));
gpu->parent->host_hal->semaphore_acquire(&push, gpu_va, 1);
uvm_push_end(&push);
// Populate the remaining GPFIFO entries, leaving 2 slots available.
// 2 available entries + 1 semaphore acquire (above) + 1 spare entry to
// indicate a terminal condition for the GPFIFO ringbuffer, therefore we
// push num_gpfifo_entries-4.
for (i = 0; i < channel->num_gpfifo_entries - 4; i++) {
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "gpfifo ctrl tight test populate"), error);
uvm_push_end(&push);
}
TEST_CHECK_GOTO(get_available_gpfifo_entries(channel) == 2, error);
// We should have room for the control GPFIFO and the subsequent
// semaphore release.
gpu->parent->host_hal->set_gpfifo_noop(&entry);
TEST_NV_CHECK_GOTO(uvm_channel_write_ctrl_gpfifo(channel, entry), error);
// Release the semaphore.
UVM_WRITE_ONCE(*cpu_ptr, 1);
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
uvm_rm_mem_free(mem);
}
return NV_OK;
error:
uvm_rm_mem_free(mem);
return status;
}
// This test is inspired by the test_rc (above).
// The test recreates the GPU's channel manager forcing its pushbuffer to be
// mapped on a non-zero 1TB segment. This exercises work submission from
// pushbuffers whose VAs are greater than 1TB.
static NV_STATUS test_channel_pushbuffer_extension_base(uvm_va_space_t *va_space)
{
uvm_gpu_t *gpu;
NV_STATUS status = NV_OK;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_va_space_gpu(gpu, va_space) {
uvm_channel_manager_t *manager;
uvm_channel_pool_t *pool;
if (!uvm_gpu_has_pushbuffer_segments(gpu))
continue;
// The GPU channel manager pushbuffer is destroyed and then re-created
// after testing a non-zero pushbuffer extension base, so this test
// requires exclusive access to the GPU.
TEST_CHECK_RET(uvm_gpu_retained_count(gpu) == 1);
gpu->uvm_test_force_upper_pushbuffer_segment = 1;
uvm_channel_manager_destroy(gpu->channel_manager);
TEST_NV_CHECK_GOTO(uvm_channel_manager_create(gpu, &gpu->channel_manager), error);
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
manager = gpu->channel_manager;
TEST_CHECK_GOTO(uvm_pushbuffer_get_gpu_va_base(manager->pushbuffer) >= (1ull << 40), error);
// Submit a few pushes with the recently allocated
// channel_manager->pushbuffer.
uvm_for_each_pool(pool, manager) {
uvm_channel_t *channel;
uvm_for_each_channel_in_pool(channel, pool) {
NvU32 i;
uvm_push_t push;
for (i = 0; i < channel->num_gpfifo_entries; i++) {
TEST_NV_CHECK_GOTO(uvm_push_begin_on_channel(channel, &push, "pushbuffer extension push test"),
error);
uvm_push_end(&push);
}
TEST_NV_CHECK_GOTO(uvm_push_wait(&push), error);
}
}
}
return NV_OK;
error:
gpu->uvm_test_force_upper_pushbuffer_segment = 0;
return status;
}
NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp) NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct file *filp)
{ {
@ -737,11 +894,23 @@ NV_STATUS uvm_test_channel_sanity(UVM_TEST_CHANNEL_SANITY_PARAMS *params, struct
if (status != NV_OK) if (status != NV_OK)
goto done; goto done;
status = test_write_ctrl_gpfifo_noop(va_space);
if (status != NV_OK)
goto done;
status = test_write_ctrl_gpfifo_and_pushes(va_space);
if (status != NV_OK)
goto done;
status = test_write_ctrl_gpfifo_tight(va_space);
if (status != NV_OK)
goto done;
// The following tests have side effects, they reset the GPU's
// channel_manager.
status = test_channel_pushbuffer_extension_base(va_space);
if (status != NV_OK)
goto done;
g_uvm_global.disable_fatal_error_assert = true; g_uvm_global.disable_fatal_error_assert = true;
uvm_release_asserts_set_global_error_for_tests = true; uvm_release_asserts_set_global_error_for_tests = true;

View File

@ -35,9 +35,6 @@ typedef struct uvm_ce_hal_struct uvm_ce_hal_t;
typedef struct uvm_arch_hal_struct uvm_arch_hal_t; typedef struct uvm_arch_hal_struct uvm_arch_hal_t;
typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t; typedef struct uvm_fault_buffer_hal_struct uvm_fault_buffer_hal_t;
typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t; typedef struct uvm_access_counter_buffer_hal_struct uvm_access_counter_buffer_hal_t;
typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t; typedef struct uvm_gpu_semaphore_struct uvm_gpu_semaphore_t;
typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t; typedef struct uvm_gpu_tracking_semaphore_struct uvm_gpu_tracking_semaphore_t;
typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t; typedef struct uvm_gpu_semaphore_pool_struct uvm_gpu_semaphore_pool_t;

View File

@ -71,12 +71,10 @@ static void uvm_unregister_callbacks(void)
} }
} }
static void sev_init(const UvmPlatformInfo *platform_info)
{
g_uvm_global.sev_enabled = platform_info->sevEnabled;
}
NV_STATUS uvm_global_init(void) NV_STATUS uvm_global_init(void)
{ {
@ -127,9 +125,7 @@ NV_STATUS uvm_global_init(void)
uvm_ats_init(&platform_info); uvm_ats_init(&platform_info);
g_uvm_global.num_simulated_devices = 0; g_uvm_global.num_simulated_devices = 0;
sev_init(&platform_info);
status = uvm_gpu_init(); status = uvm_gpu_init();
if (status != NV_OK) { if (status != NV_OK) {

View File

@ -143,12 +143,11 @@ struct uvm_global_struct
struct page *page; struct page *page;
} unload_state; } unload_state;
// AMD Secure Encrypted Virtualization (SEV) status. True if VM has SEV
// enabled. This field is set once during global initialization
// (uvm_global_init), and can be read afterwards without acquiring any
// locks.
bool sev_enabled;
}; };
// Initialize global uvm state // Initialize global uvm state

View File

@ -42,9 +42,6 @@
#include "uvm_ats.h" #include "uvm_ats.h"
#include "uvm_test.h" #include "uvm_test.h"
#include "uvm_linux.h" #include "uvm_linux.h"
#define UVM_PROC_GPUS_PEER_DIR_NAME "peers" #define UVM_PROC_GPUS_PEER_DIR_NAME "peers"
@ -95,12 +92,10 @@ static uvm_gpu_link_type_t get_gpu_link_type(UVM_LINK_TYPE link_type)
return UVM_GPU_LINK_NVLINK_2; return UVM_GPU_LINK_NVLINK_2;
case UVM_LINK_TYPE_NVLINK_3: case UVM_LINK_TYPE_NVLINK_3:
return UVM_GPU_LINK_NVLINK_3; return UVM_GPU_LINK_NVLINK_3;
case UVM_LINK_TYPE_NVLINK_4:
return UVM_GPU_LINK_NVLINK_4;
case UVM_LINK_TYPE_C2C:
return UVM_GPU_LINK_C2C;
default: default:
return UVM_GPU_LINK_INVALID; return UVM_GPU_LINK_INVALID;
} }
@ -356,11 +351,7 @@ static const char *uvm_gpu_virt_type_string(UVM_VIRT_MODE virtMode)
static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type) static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
{ {
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 7);
BUILD_BUG_ON(UVM_GPU_LINK_MAX != 5);
switch (link_type) { switch (link_type) {
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID); UVM_ENUM_STRING_CASE(UVM_GPU_LINK_INVALID);
@ -368,10 +359,8 @@ static const char *uvm_gpu_link_type_string(uvm_gpu_link_type_t link_type)
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1); UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_1);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2); UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_2);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3); UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_3);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_NVLINK_4);
UVM_ENUM_STRING_CASE(UVM_GPU_LINK_C2C);
UVM_ENUM_STRING_DEFAULT(); UVM_ENUM_STRING_DEFAULT();
} }
} }
@ -519,12 +508,6 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu_info_print_ce_caps(gpu, s); gpu_info_print_ce_caps(gpu, s);
} }
static void static void
@ -1038,15 +1021,6 @@ static NV_STATUS init_parent_gpu(uvm_parent_gpu_t *parent_gpu,
return status; return status;
} }
parent_gpu->pci_dev = gpu_platform_info->pci_dev; parent_gpu->pci_dev = gpu_platform_info->pci_dev;
parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev); parent_gpu->closest_cpu_numa_node = dev_to_node(&parent_gpu->pci_dev->dev);
parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start; parent_gpu->dma_addressable_start = gpu_platform_info->dma_addressable_start;
@ -1208,16 +1182,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status; return status;
} }
status = init_procfs_files(gpu); status = init_procfs_files(gpu);
if (status != NV_OK) { if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu)); UVM_ERR_PRINT("Failed to init procfs files: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
@ -1393,10 +1357,6 @@ static void remove_gpus_from_gpu(uvm_gpu_t *gpu)
// Sync all trackers in PMM // Sync all trackers in PMM
uvm_pmm_gpu_sync(&gpu->pmm); uvm_pmm_gpu_sync(&gpu->pmm);
} }
// Remove all references to the given GPU from its parent, since it is being // Remove all references to the given GPU from its parent, since it is being
@ -1530,13 +1490,6 @@ static void remove_gpu(uvm_gpu_t *gpu)
if (free_parent) if (free_parent)
destroy_nvlink_peers(gpu); destroy_nvlink_peers(gpu);
// TODO: Bug 2844714: If the parent is not being freed, the following // TODO: Bug 2844714: If the parent is not being freed, the following
// gpu_table_lock is only needed to protect concurrent // gpu_table_lock is only needed to protect concurrent
// find_first_valid_gpu() in BH from the __clear_bit here. After // find_first_valid_gpu() in BH from the __clear_bit here. After
@ -2212,16 +2165,12 @@ static NV_STATUS init_peer_access(uvm_gpu_t *gpu0,
{ {
NV_STATUS status; NV_STATUS status;
UVM_ASSERT(p2p_caps_params->p2pLink != UVM_LINK_TYPE_C2C);
// check for peer-to-peer compatibility (PCI-E or NvLink). // check for peer-to-peer compatibility (PCI-E or NvLink).
peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink); peer_caps->link_type = get_gpu_link_type(p2p_caps_params->p2pLink);
if (peer_caps->link_type == UVM_GPU_LINK_INVALID if (peer_caps->link_type == UVM_GPU_LINK_INVALID
|| peer_caps->link_type == UVM_GPU_LINK_C2C
) )
return NV_ERR_NOT_SUPPORTED; return NV_ERR_NOT_SUPPORTED;

View File

@ -46,9 +46,6 @@
#include "uvm_rb_tree.h" #include "uvm_rb_tree.h"
#include "nv-kthread-q.h" #include "nv-kthread-q.h"
// Buffer length to store uvm gpu id, RM device name and gpu uuid. // Buffer length to store uvm gpu id, RM device name and gpu uuid.
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \ #define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH) UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)
@ -508,10 +505,8 @@ typedef enum
UVM_GPU_LINK_NVLINK_1, UVM_GPU_LINK_NVLINK_1,
UVM_GPU_LINK_NVLINK_2, UVM_GPU_LINK_NVLINK_2,
UVM_GPU_LINK_NVLINK_3, UVM_GPU_LINK_NVLINK_3,
UVM_GPU_LINK_NVLINK_4,
UVM_GPU_LINK_C2C,
UVM_GPU_LINK_MAX UVM_GPU_LINK_MAX
} uvm_gpu_link_type_t; } uvm_gpu_link_type_t;
@ -684,10 +679,6 @@ struct uvm_gpu_struct
// mappings (instead of kernel), and it is used in most configurations. // mappings (instead of kernel), and it is used in most configurations.
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings; uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
// ECC handling // ECC handling
// In order to trap ECC errors as soon as possible the driver has the hw // In order to trap ECC errors as soon as possible the driver has the hw
// interrupt register mapped directly. If an ECC interrupt is ever noticed // interrupt register mapped directly. If an ECC interrupt is ever noticed
@ -742,6 +733,9 @@ struct uvm_gpu_struct
// Placeholder for per-GPU performance heuristics information // Placeholder for per-GPU performance heuristics information
uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT]; uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
// Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
bool uvm_test_force_upper_pushbuffer_segment;
}; };
struct uvm_parent_gpu_struct struct uvm_parent_gpu_struct
@ -823,9 +817,6 @@ struct uvm_parent_gpu_struct
uvm_fault_buffer_hal_t *fault_buffer_hal; uvm_fault_buffer_hal_t *fault_buffer_hal;
uvm_access_counter_buffer_hal_t *access_counter_buffer_hal; uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
uvm_gpu_peer_copy_mode_t peer_copy_mode; uvm_gpu_peer_copy_mode_t peer_copy_mode;
// Virtualization mode of the GPU. // Virtualization mode of the GPU.
@ -1360,14 +1351,13 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
// addresses. // addresses.
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr); NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
{
return gpu->parent->max_host_va > (1ull << 40);
}
static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu) static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
{ {
// Eviction is supported only if the GPU supports replayable faults // Eviction is supported only if the GPU supports replayable faults
return gpu->parent->replayable_faults_supported; return gpu->parent->replayable_faults_supported;
} }

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -134,7 +134,11 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
pool_page->pool = pool; pool_page->pool = pool;
status = uvm_rm_mem_alloc_and_map_all(pool->gpu, UVM_RM_MEM_TYPE_SYS, UVM_SEMAPHORE_PAGE_SIZE, &pool_page->memory); status = uvm_rm_mem_alloc_and_map_all(pool->gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_SEMAPHORE_PAGE_SIZE,
0,
&pool_page->memory);
if (status != NV_OK) if (status != NV_OK)
goto error; goto error;
@ -321,7 +325,7 @@ NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu
uvm_mutex_lock(&pool->mutex); uvm_mutex_lock(&pool->mutex);
list_for_each_entry(page, &pool->pages, all_pages_node) { list_for_each_entry(page, &pool->pages, all_pages_node) {
status = uvm_rm_mem_map_gpu(page->memory, gpu); status = uvm_rm_mem_map_gpu(page->memory, gpu, 0);
if (status != NV_OK) if (status != NV_OK)
goto done; goto done;
} }

View File

@ -41,10 +41,8 @@
#include "clc6b5.h" #include "clc6b5.h"
#include "clc56f.h" #include "clc56f.h"
#include "clc7b5.h" #include "clc7b5.h"
#include "clc86f.h"
#include "clc8b5.h"
#define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *)) #define CE_OP_COUNT (sizeof(uvm_ce_hal_t) / sizeof(void *))
#define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *)) #define HOST_OP_COUNT (sizeof(uvm_host_hal_t) / sizeof(void *))
@ -52,9 +50,6 @@
#define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *)) #define FAULT_BUFFER_OP_COUNT (sizeof(uvm_fault_buffer_hal_t) / sizeof(void *))
#define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *)) #define ACCESS_COUNTER_BUFFER_OP_COUNT (sizeof(uvm_access_counter_buffer_hal_t) / sizeof(void *))
// Table for copy engine functions. // Table for copy engine functions.
// Each entry is associated with a copy engine class through the 'class' field. // Each entry is associated with a copy engine class through the 'class' field.
// By setting the 'parent_class' field, a class will inherit the parent class's // By setting the 'parent_class' field, a class will inherit the parent class's
@ -133,22 +128,20 @@ static uvm_hal_class_ops_t ce_table[] =
.memset_validate = uvm_hal_ce_memset_validate_stub, .memset_validate = uvm_hal_ce_memset_validate_stub,
}, },
}, },
{
.id = HOPPER_DMA_COPY_A,
.parent_id = AMPERE_DMA_COPY_B,
.u.ce_ops = {
.semaphore_release = uvm_hal_hopper_ce_semaphore_release,
.semaphore_timestamp = uvm_hal_hopper_ce_semaphore_timestamp,
.semaphore_reduction_inc = uvm_hal_hopper_ce_semaphore_reduction_inc,
.offset_out = uvm_hal_hopper_ce_offset_out,
.offset_in_out = uvm_hal_hopper_ce_offset_in_out,
.memset_1 = uvm_hal_hopper_ce_memset_1,
.memset_4 = uvm_hal_hopper_ce_memset_4,
.memset_8 = uvm_hal_hopper_ce_memset_8,
},
},
}; };
// Table for GPFIFO functions. Same idea as the copy engine table. // Table for GPFIFO functions. Same idea as the copy engine table.
@ -171,6 +164,8 @@ static uvm_hal_class_ops_t host_table[] =
.semaphore_release = uvm_hal_maxwell_host_semaphore_release, .semaphore_release = uvm_hal_maxwell_host_semaphore_release,
.semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp, .semaphore_timestamp = uvm_hal_maxwell_host_semaphore_timestamp,
.set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry, .set_gpfifo_entry = uvm_hal_maxwell_host_set_gpfifo_entry,
.set_gpfifo_noop = uvm_hal_maxwell_host_set_gpfifo_noop,
.set_gpfifo_pushbuffer_segment_base = uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported,
.write_gpu_put = uvm_hal_maxwell_host_write_gpu_put, .write_gpu_put = uvm_hal_maxwell_host_write_gpu_put,
.tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f, .tlb_invalidate_all = uvm_hal_maxwell_host_tlb_invalidate_all_a16f,
.tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va, .tlb_invalidate_va = uvm_hal_maxwell_host_tlb_invalidate_va,
@ -249,23 +244,23 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test, .tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
} }
}, },
{
.id = HOPPER_CHANNEL_GPFIFO_A,
.parent_id = AMPERE_CHANNEL_GPFIFO_A,
.u.host_ops = {
.method_validate = uvm_hal_method_validate_stub,
.sw_method_validate = uvm_hal_method_validate_stub,
.semaphore_acquire = uvm_hal_hopper_host_semaphore_acquire,
.semaphore_release = uvm_hal_hopper_host_semaphore_release,
.semaphore_timestamp = uvm_hal_hopper_host_semaphore_timestamp,
.tlb_invalidate_all = uvm_hal_hopper_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_hopper_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_hopper_host_tlb_invalidate_test,
.cancel_faults_va = uvm_hal_hopper_cancel_faults_va,
.set_gpfifo_entry = uvm_hal_hopper_host_set_gpfifo_entry,
.set_gpfifo_pushbuffer_segment_base = uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base,
}
},
}; };
static uvm_hal_class_ops_t arch_table[] = static uvm_hal_class_ops_t arch_table[] =
@ -326,43 +321,23 @@ static uvm_hal_class_ops_t arch_table[] =
.mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id, .mmu_client_id_to_utlb_id = uvm_hal_ampere_mmu_client_id_to_utlb_id,
}, },
}, },
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.u.arch_ops = {
.init_properties = uvm_hal_ada_arch_init_properties,
},
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.u.arch_ops = {
.init_properties = uvm_hal_hopper_arch_init_properties,
.mmu_mode_hal = uvm_hal_mmu_mode_hopper,
.mmu_engine_id_to_type = uvm_hal_hopper_mmu_engine_id_to_type,
.mmu_client_id_to_utlb_id = uvm_hal_hopper_mmu_client_id_to_utlb_id,
},
},
}; };
static uvm_hal_class_ops_t fault_buffer_table[] = static uvm_hal_class_ops_t fault_buffer_table[] =
@ -430,33 +405,18 @@ static uvm_hal_class_ops_t fault_buffer_table[] =
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100, .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.u.fault_buffer_ops = {} .u.fault_buffer_ops = {}
}, },
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.u.fault_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.u.fault_buffer_ops = {
.get_ve_id = uvm_hal_hopper_fault_buffer_get_ve_id,
}
},
}; };
static uvm_hal_class_ops_t access_counter_buffer_table[] = static uvm_hal_class_ops_t access_counter_buffer_table[] =
@ -509,105 +469,18 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100, .parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.u.access_counter_buffer_ops = {} .u.access_counter_buffer_ops = {}
}, },
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GA100,
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_AD100,
.u.access_counter_buffer_ops = {}
},
}; };
static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id) static inline uvm_hal_class_ops_t *ops_find_by_id(uvm_hal_class_ops_t *table, NvU32 row_count, NvU32 id)
{ {
NvLength i; NvLength i;
@ -711,17 +584,6 @@ NV_STATUS uvm_hal_init_table(void)
return status; return status;
} }
return NV_OK; return NV_OK;
} }
@ -772,16 +634,6 @@ NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops; parent_gpu->access_counter_buffer_hal = &class_ops->u.access_counter_buffer_ops;
return NV_OK; return NV_OK;
} }

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -41,10 +41,6 @@ void uvm_hal_maxwell_ce_init(uvm_push_t *push);
void uvm_hal_maxwell_host_init_noop(uvm_push_t *push); void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
void uvm_hal_pascal_host_init(uvm_push_t *push); void uvm_hal_pascal_host_init(uvm_push_t *push);
// Host method validation // Host method validation
typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data); typedef bool (*uvm_hal_host_method_validate)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data); bool uvm_hal_ampere_host_method_validate(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
@ -118,12 +114,10 @@ void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb, uvm_gpu_phys_address_t pdb,
NvU32 depth, NvU32 depth,
uvm_membar_t membar); uvm_membar_t membar);
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar);
// Issue a TLB invalidate applying to the specified VA range in a PDB. // Issue a TLB invalidate applying to the specified VA range in a PDB.
// //
@ -183,15 +177,13 @@ void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
NvU64 size, NvU64 size,
NvU32 page_size, NvU32 page_size,
uvm_membar_t membar); uvm_membar_t membar);
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar);
typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push, typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
uvm_gpu_phys_address_t pdb, uvm_gpu_phys_address_t pdb,
@ -205,11 +197,9 @@ void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push, void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb, uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params); UVM_TEST_INVALIDATE_TLB_PARAMS *params);
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params);
// By default all semaphore release operations include a membar sys before the // By default all semaphore release operations include a membar sys before the
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@ -217,18 +207,10 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Release a semaphore including a timestamp at the specific GPU VA. // Release a semaphore including a timestamp at the specific GPU VA.
// //
@ -238,31 +220,28 @@ void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32
typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va); typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va); void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va); void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va); void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va); void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length); typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length); void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length); void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length);
typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put); typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put); void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
@ -277,16 +256,12 @@ NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset); typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset); void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset); void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out); typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out); void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out); void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src); typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src); NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
@ -354,11 +329,9 @@ void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32
void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size); void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size); void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
// Increments the semaphore by 1, or resets to 0 if the incremented value would // Increments the semaphore by 1, or resets to 0 if the incremented value would
// exceed the payload. // exceed the payload.
@ -369,9 +342,7 @@ void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value,
typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload); void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
// Initialize GPU architecture dependent properties // Initialize GPU architecture dependent properties
typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu); typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
@ -380,12 +351,8 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu); void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu); void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu); void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
// Retrieve the page-tree HAL for a given big page size // Retrieve the page-tree HAL for a given big page size
typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size); typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
@ -396,9 +363,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size); uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size); uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size); uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu); void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu); void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu); void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
@ -412,18 +377,14 @@ uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mm
uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id); uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id); uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id); uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id); typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id); NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id); NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id); NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id); NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
// Replayable faults // Replayable faults
typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu); typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
@ -477,9 +438,7 @@ void uvm_hal_volta_fault_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *buffer_entry); uvm_fault_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu); void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get); void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index); bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index); void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
@ -529,12 +488,10 @@ void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
const uvm_fault_buffer_entry_t *fault_entry, const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode); uvm_fault_cancel_va_mode_t cancel_va_mode);
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode);
typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push, typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
uvm_user_channel_t *user_channel, uvm_user_channel_t *user_channel,
@ -619,39 +576,6 @@ void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu); void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get); void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
struct uvm_host_hal_struct struct uvm_host_hal_struct
{ {
uvm_hal_init_t init; uvm_hal_init_t init;
@ -666,6 +590,8 @@ struct uvm_host_hal_struct
uvm_hal_semaphore_acquire_t semaphore_acquire; uvm_hal_semaphore_acquire_t semaphore_acquire;
uvm_hal_semaphore_timestamp_t semaphore_timestamp; uvm_hal_semaphore_timestamp_t semaphore_timestamp;
uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry; uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
uvm_hal_host_write_gpu_put_t write_gpu_put; uvm_hal_host_write_gpu_put_t write_gpu_put;
uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all; uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va; uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
@ -742,17 +668,6 @@ struct uvm_access_counter_buffer_hal_struct
uvm_hal_access_counter_buffer_entry_size_t entry_size; uvm_hal_access_counter_buffer_entry_size_t entry_size;
}; };
typedef struct typedef struct
{ {
// id is either a hardware class or GPU architecture // id is either a hardware class or GPU architecture
@ -775,10 +690,6 @@ typedef struct
// access_counter_buffer_ops: id is an architecture // access_counter_buffer_ops: id is an architecture
uvm_access_counter_buffer_hal_t access_counter_buffer_ops; uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
} u; } u;
} uvm_hal_class_ops_t; } uvm_hal_class_ops_t;

View File

@ -31,7 +31,6 @@ MODULE_PARM_DESC(uvm_disable_hmm,
"HMM is not supported in the driver, or if ATS settings " "HMM is not supported in the driver, or if ATS settings "
"conflict with HMM."); "conflict with HMM.");
#if UVM_IS_CONFIG_HMM() #if UVM_IS_CONFIG_HMM()
#include <linux/hmm.h> #include <linux/hmm.h>
@ -785,6 +784,60 @@ NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
return NV_OK; return NV_OK;
} }
static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
uvm_va_block_context_t *block_context)
{
uvm_va_policy_t *policy;
uvm_va_policy_node_t *node;
uvm_va_block_region_t region;
NV_STATUS status = NV_OK;
uvm_mutex_lock(&va_block->lock);
uvm_for_each_va_policy_in(policy, va_block, va_block->start, va_block->end, node, region) {
block_context->policy = policy;
// Unmap may split PTEs and require a retry. Needs to be called
// before the pinned pages information is destroyed.
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
NULL,
unmap_remote_pinned_pages_from_all_processors(va_block,
block_context,
region));
uvm_perf_thrashing_info_destroy(va_block);
if (status != NV_OK)
break;
}
uvm_mutex_unlock(&va_block->lock);
return status;
}
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
{
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
uvm_range_tree_node_t *node, *next;
uvm_va_block_t *va_block;
NV_STATUS status = NV_OK;
if (!uvm_hmm_is_enabled(va_space))
return NV_OK;
uvm_assert_rwsem_locked_write(&va_space->lock);
uvm_range_tree_for_each_safe(node, next, &va_space->hmm.blocks) {
va_block = hmm_va_block_from_node(node);
status = hmm_clear_thrashing_policy(va_block, block_context);
if (status != NV_OK)
break;
}
return status;
}
#endif // UVM_IS_CONFIG_HMM() #endif // UVM_IS_CONFIG_HMM()

View File

@ -185,6 +185,10 @@ typedef struct
uvm_page_index_t page_index, uvm_page_index_t page_index,
uvm_page_index_t *outerp); uvm_page_index_t *outerp);
// Clear thrashing policy information from all HMM va_blocks.
// Locking: va_space lock must be held in write mode.
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
#else // UVM_IS_CONFIG_HMM() #else // UVM_IS_CONFIG_HMM()
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space) static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
@ -282,6 +286,11 @@ typedef struct
return NV_OK; return NV_OK;
} }
static NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
{
return NV_OK;
}
#endif // UVM_IS_CONFIG_HMM() #endif // UVM_IS_CONFIG_HMM()
#endif // _UVM_HMM_H_ #endif // _UVM_HMM_H_

View File

@ -0,0 +1,96 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_gpu.h"
#include "uvm_mem.h"
#include "uvm_hopper_fault_buffer.h"
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
{
parent_gpu->tlb_batch.va_invalidate_supported = true;
parent_gpu->tlb_batch.va_range_invalidate_supported = true;
// TODO: Bug 1767241: Run benchmarks to figure out a good number
parent_gpu->tlb_batch.max_ranges = 8;
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Hopper covers 64 PB and that's the minimum
// size that can be used.
parent_gpu->rm_va_base = 0;
parent_gpu->rm_va_size = 64ull * 1024 * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_base = parent_gpu->rm_va_size + 384ull * 1024 * 1024 * 1024 * 1024;
parent_gpu->uvm_mem_va_size = UVM_MEM_VA_SIZE;
parent_gpu->peer_copy_mode = g_uvm_global.peer_copy_mode;
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
// accessing GR context buffers support the 57-bit VA range.
parent_gpu->max_channel_va = 1ull << 57;
parent_gpu->max_host_va = 1ull << 57;
// Hopper can map sysmem with any page size
parent_gpu->can_map_sysmem_with_large_pages = true;
// Prefetch instructions will generate faults
parent_gpu->prefetch_fault_supported = true;
// Hopper can place GPFIFO in vidmem
parent_gpu->gpfifo_in_vidmem_supported = true;
parent_gpu->replayable_faults_supported = true;
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;
parent_gpu->has_clear_faulted_channel_sw_method = true;
parent_gpu->has_clear_faulted_channel_method = false;
parent_gpu->smc.supported = true;
parent_gpu->sparse_mappings_supported = true;
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
}

View File

@ -0,0 +1,339 @@
/*******************************************************************************
Copyright (c) 2020 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_push.h"
#include "clc8b5.h"
static void hopper_membar_after_transfer(uvm_push_t *push)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
return;
// TODO: [UVM-Volta] Remove Host WFI + Membar WAR for CE flush-only bug
// http://nvbugs/1734761
gpu->parent->host_hal->wait_for_idle(push);
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
gpu->parent->host_hal->membar_gpu(push);
else
gpu->parent->host_hal->membar_sys(push);
}
static NvU32 ce_aperture(uvm_aperture_t aperture)
{
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
BUILD_BUG_ON(HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
HWCONST(C8B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
if (aperture == UVM_APERTURE_SYS) {
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
}
else if (aperture == UVM_APERTURE_VID) {
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
}
else {
return HWCONST(C8B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
HWVALUE(C8B5, SET_SRC_PHYS_MODE, FLA, 0) |
HWVALUE(C8B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
}
}
void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset_out)
{
NV_PUSH_2U(C8B5, OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
}
void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out)
{
NV_PUSH_4U(C8B5, OFFSET_IN_UPPER, HWVALUE(C8B5, OFFSET_IN_UPPER, UPPER, NvOffset_HI32(offset_in)),
OFFSET_IN_LOWER, HWVALUE(C8B5, OFFSET_IN_LOWER, VALUE, NvOffset_LO32(offset_in)),
OFFSET_OUT_UPPER, HWVALUE(C8B5, OFFSET_OUT_UPPER, UPPER, NvOffset_HI32(offset_out)),
OFFSET_OUT_LOWER, HWVALUE(C8B5, OFFSET_OUT_LOWER, VALUE, NvOffset_LO32(offset_out)));
}
// Perform an appropriate membar before a semaphore operation. Returns whether
// the semaphore operation should include a flush.
static bool hopper_membar_before_semaphore(uvm_push_t *push)
{
uvm_gpu_t *gpu;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE)) {
// No MEMBAR requested, don't use a flush.
return false;
}
if (!uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU)) {
// By default do a MEMBAR SYS and for that we can just use flush on the
// semaphore operation.
return true;
}
// TODO: Bug 1734761: Remove the HOST WFI+membar WAR, i.e, perform the CE
// flush when MEMBAR GPU is requested.
gpu = uvm_push_get_gpu(push);
gpu->parent->host_hal->wait_for_idle(push);
gpu->parent->host_hal->membar_gpu(push);
return false;
}
void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 flush_value;
NvU32 launch_dma_plc_mode;
bool use_flush;
use_flush = hopper_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, payload);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
launch_dma_plc_mode);
}
void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 flush_value;
NvU32 launch_dma_plc_mode;
bool use_flush;
use_flush = hopper_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, payload);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_ONE_WORD_SEMAPHORE) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION, INC) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_SIGN, UNSIGNED) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_REDUCTION_ENABLE, TRUE) |
launch_dma_plc_mode);
}
void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
{
uvm_gpu_t *gpu;
NvU32 flush_value;
NvU32 launch_dma_plc_mode;
bool use_flush;
use_flush = hopper_membar_before_semaphore(push);
if (use_flush)
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, TRUE);
else
flush_value = HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE);
NV_PUSH_3U(C8B5, SET_SEMAPHORE_A, HWVALUE(C8B5, SET_SEMAPHORE_A, UPPER, NvOffset_HI32(gpu_va)),
SET_SEMAPHORE_B, HWVALUE(C8B5, SET_SEMAPHORE_B, LOWER, NvOffset_LO32(gpu_va)),
SET_SEMAPHORE_PAYLOAD, 0xdeadbeef);
gpu = uvm_push_get_gpu(push);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
NV_PUSH_1U(C8B5, LAUNCH_DMA, flush_value |
HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NONE) |
HWCONST(C8B5, LAUNCH_DMA, SEMAPHORE_TYPE, RELEASE_FOUR_WORD_SEMAPHORE) |
launch_dma_plc_mode);
}
static NvU32 hopper_memset_push_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst)
{
if (dst.is_virtual)
return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
NV_PUSH_1U(C8B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
return HWCONST(C8B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
}
static bool hopper_scrub_enable(uvm_gpu_address_t dst, size_t size)
{
return !dst.is_virtual &&
dst.aperture == UVM_APERTURE_VID &&
IS_ALIGNED(dst.address, UVM_PAGE_SIZE_4K) &&
IS_ALIGNED(size, UVM_PAGE_SIZE_4K);
}
static void hopper_memset_common(uvm_push_t *push,
uvm_gpu_address_t dst,
size_t num_elements,
size_t memset_element_size)
{
// If >4GB memsets ever become an important use case, this function should
// use multi-line transfers so we don't have to iterate (bug 1766588).
static const size_t max_single_memset = 0xFFFFFFFF;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
NvU32 pipelined_value;
NvU32 launch_dma_dst_type;
NvU32 launch_dma_plc_mode;
NvU32 launch_dma_remap_enable;
NvU32 launch_dma_scrub_enable;
UVM_ASSERT_MSG(gpu->parent->ce_hal->memset_validate(push, dst, memset_element_size),
"Memset validation failed in channel %s, GPU %s",
push->channel->name,
uvm_gpu_name(gpu));
launch_dma_dst_type = hopper_memset_push_phys_mode(push, dst);
launch_dma_plc_mode = gpu->parent->ce_hal->plc_mode();
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED))
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, PIPELINED);
else
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
if (memset_element_size == 8 && hopper_scrub_enable(dst, num_elements * memset_element_size)) {
launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, FALSE);
launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, TRUE);
NV_PUSH_1U(C8B5, SET_MEMORY_SCRUB_PARAMETERS,
HWCONST(C8B5, SET_MEMORY_SCRUB_PARAMETERS, DISCARDABLE, FALSE));
// Scrub requires disabling remap, and with remap disabled the element
// size is 1.
num_elements *= memset_element_size;
memset_element_size = 1;
}
else {
launch_dma_remap_enable = HWCONST(C8B5, LAUNCH_DMA, REMAP_ENABLE, TRUE);
launch_dma_scrub_enable = HWCONST(C8B5, LAUNCH_DMA, MEMORY_SCRUB_ENABLE, FALSE);
}
do {
NvU32 memset_this_time = (NvU32)min(num_elements, max_single_memset);
gpu->parent->ce_hal->offset_out(push, dst.address);
NV_PUSH_1U(C8B5, LINE_LENGTH_IN, memset_this_time);
NV_PUSH_1U(C8B5, LAUNCH_DMA,
HWCONST(C8B5, LAUNCH_DMA, SRC_MEMORY_LAYOUT, PITCH) |
HWCONST(C8B5, LAUNCH_DMA, DST_MEMORY_LAYOUT, PITCH) |
HWCONST(C8B5, LAUNCH_DMA, MULTI_LINE_ENABLE, FALSE) |
HWCONST(C8B5, LAUNCH_DMA, FLUSH_ENABLE, FALSE) |
launch_dma_remap_enable |
launch_dma_scrub_enable |
launch_dma_dst_type |
launch_dma_plc_mode |
pipelined_value);
dst.address += memset_this_time * memset_element_size;
num_elements -= memset_this_time;
pipelined_value = HWCONST(C8B5, LAUNCH_DMA, DATA_TRANSFER_TYPE, NON_PIPELINED);
} while (num_elements > 0);
hopper_membar_after_transfer(push);
}
void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size)
{
UVM_ASSERT_MSG(size % 8 == 0, "size: %zd\n", size);
size /= 8;
NV_PUSH_3U(C8B5, SET_REMAP_CONST_A, (NvU32)value,
SET_REMAP_CONST_B, (NvU32)(value >> 32),
SET_REMAP_COMPONENTS,
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_A) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_Y, CONST_B) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, TWO));
hopper_memset_common(push, dst, size, 8);
}
void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size)
{
if (hopper_scrub_enable(dst, size)) {
NvU64 value64 = value;
value64 |= value64 << 8;
value64 |= value64 << 16;
value64 |= value64 << 32;
uvm_hal_hopper_ce_memset_8(push, dst, value64, size);
return;
}
NV_PUSH_2U(C8B5, SET_REMAP_CONST_B, (NvU32)value,
SET_REMAP_COMPONENTS,
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, ONE) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
hopper_memset_common(push, dst, size, 1);
}
void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size)
{
UVM_ASSERT_MSG(size % 4 == 0, "size: %zd\n", size);
if (hopper_scrub_enable(dst, size)) {
NvU64 value64 = value;
value64 |= value64 << 32;
uvm_hal_hopper_ce_memset_8(push, dst, value64, size);
return;
}
size /= 4;
NV_PUSH_2U(C8B5, SET_REMAP_CONST_B, value,
SET_REMAP_COMPONENTS,
HWCONST(C8B5, SET_REMAP_COMPONENTS, DST_X, CONST_B) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, COMPONENT_SIZE, FOUR) |
HWCONST(C8B5, SET_REMAP_COMPONENTS, NUM_DST_COMPONENTS, ONE));
hopper_memset_common(push, dst, size, 4);
}

View File

@ -0,0 +1,42 @@
/*******************************************************************************
Copyright (c) 2020 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal_types.h"
#include "hwref/hopper/gh100/dev_fault.h"
// TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM
// ... to validate the ve_id
#define MAX_SUBCONTEXTS 64
NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type)
{
// Only graphics engines can generate MMU faults from different subcontexts
if (mmu_engine_type == UVM_MMU_ENGINE_TYPE_GRAPHICS) {
NvU16 ve_id = mmu_engine_id - NV_PFAULT_MMU_ENG_ID_GRAPHICS;
UVM_ASSERT(ve_id < MAX_SUBCONTEXTS);
return (NvU8)ve_id;
}
else {
return 0;
}
}

View File

@ -0,0 +1,90 @@
/*******************************************************************************
Copyright (c) 2020 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_HAL_HOPPER_FAULT_BUFFER_H__
#define __UVM_HAL_HOPPER_FAULT_BUFFER_H__
#include "nvtypes.h"
#include "uvm_common.h"
#include "uvm_gpu.h"
// There are up to 9 TPCs per GPC in Hopper, and there are 2 LTP uTLBs per TPC.
// Besides, there is one active RGG uTLB per GPC. Each TPC has a number of
// clients that can make requests to its uTLBs: 1xTPCCS, 1xPE, 2xT1. Requests
// from these units are routed as follows to the 2 LTP uTLBs:
//
// -------- ---------
// | T1_0 | -----------------> | uTLB0 |
// -------- ---------
//
// -------- ---------
// | T1_1 | -----------------> | uTLB1 |
// -------- --------> ---------
// | ^
// ------- | |
// | PE | ----------- |
// ------- |
// |
// --------- |
// | TPCCS | -----------------------
// ---------
//
//
// The client ids are local to their GPC and the id mapping is linear across
// TPCs: TPC_n has TPCCS_n, PE_n, T1_p, and T1_q, where p=2*n and q=p+1.
//
// NV_PFAULT_CLIENT_GPC_LTP_UTLB_n and NV_PFAULT_CLIENT_GPC_RGG_UTLB enums can
// be ignored. These will never be reported in a fault message, and should
// never be used in an invalidate. Therefore, we define our own values.
typedef enum {
UVM_HOPPER_GPC_UTLB_ID_RGG = 0,
UVM_HOPPER_GPC_UTLB_ID_LTP0 = 1,
UVM_HOPPER_GPC_UTLB_ID_LTP1 = 2,
UVM_HOPPER_GPC_UTLB_ID_LTP2 = 3,
UVM_HOPPER_GPC_UTLB_ID_LTP3 = 4,
UVM_HOPPER_GPC_UTLB_ID_LTP4 = 5,
UVM_HOPPER_GPC_UTLB_ID_LTP5 = 6,
UVM_HOPPER_GPC_UTLB_ID_LTP6 = 7,
UVM_HOPPER_GPC_UTLB_ID_LTP7 = 8,
UVM_HOPPER_GPC_UTLB_ID_LTP8 = 9,
UVM_HOPPER_GPC_UTLB_ID_LTP9 = 10,
UVM_HOPPER_GPC_UTLB_ID_LTP10 = 11,
UVM_HOPPER_GPC_UTLB_ID_LTP11 = 12,
UVM_HOPPER_GPC_UTLB_ID_LTP12 = 13,
UVM_HOPPER_GPC_UTLB_ID_LTP13 = 14,
UVM_HOPPER_GPC_UTLB_ID_LTP14 = 15,
UVM_HOPPER_GPC_UTLB_ID_LTP15 = 16,
UVM_HOPPER_GPC_UTLB_ID_LTP16 = 17,
UVM_HOPPER_GPC_UTLB_ID_LTP17 = 18,
UVM_HOPPER_GPC_UTLB_COUNT,
} uvm_hopper_gpc_utlb_id_t;
static NvU32 uvm_hopper_get_utlbs_per_gpc(uvm_parent_gpu_t *parent_gpu)
{
NvU32 utlbs = parent_gpu->rm_info.maxTpcPerGpcCount * 2 + 1;
UVM_ASSERT(utlbs <= UVM_HOPPER_GPC_UTLB_COUNT);
return utlbs;
}
#endif

View File

@ -0,0 +1,412 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_hal.h"
#include "uvm_push.h"
#include "uvm_push_macros.h"
#include "clc86f.h"
// TODO: Bug 3210931: Rename HOST references and files to ESCHED.
void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
SEM_PAYLOAD_LO, payload,
SEM_PAYLOAD_HI, 0,
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, RELEASE) |
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
HWCONST(C86F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS) |
HWCONST(C86F, SEM_EXECUTE, RELEASE_WFI, DIS));
}
void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
SEM_PAYLOAD_LO, payload,
SEM_PAYLOAD_HI, 0,
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, ACQ_CIRC_GEQ) |
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
HWCONST(C86F, SEM_EXECUTE, ACQUIRE_SWITCH_TSG, EN));
}
void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va)
{
NvU32 sem_lo;
UVM_ASSERT(!(NvOffset_LO32(gpu_va) & ~HWSHIFTMASK(C86F, SEM_ADDR_LO, OFFSET)));
sem_lo = READ_HWVALUE(NvOffset_LO32(gpu_va), C86F, SEM_ADDR_LO, OFFSET);
uvm_hal_wfi_membar(push, uvm_push_get_and_reset_membar_flag(push));
NV_PUSH_5U(C86F, SEM_ADDR_LO, HWVALUE(C86F, SEM_ADDR_LO, OFFSET, sem_lo),
SEM_ADDR_HI, HWVALUE(C86F, SEM_ADDR_HI, OFFSET, NvOffset_HI32(gpu_va)),
SEM_PAYLOAD_LO, 0xdeadbeef,
SEM_PAYLOAD_HI, 0,
SEM_EXECUTE, HWCONST(C86F, SEM_EXECUTE, OPERATION, RELEASE) |
HWCONST(C86F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
HWCONST(C86F, SEM_EXECUTE, RELEASE_TIMESTAMP, EN) |
HWCONST(C86F, SEM_EXECUTE, RELEASE_WFI, DIS));
}
static NvU32 fault_cancel_va_mode_to_cancel_access_type(uvm_fault_cancel_va_mode_t cancel_va_mode)
{
// There are only two logical cases from the perspective of UVM. Accesses to
// an invalid address, which will cancel all accesses on the page, and
// accesses with an invalid type on a read-only page, which will cancel all
// write/atomic accesses on the page.
switch (cancel_va_mode)
{
case UVM_FAULT_CANCEL_VA_MODE_ALL:
return HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACCESS_TYPE, VIRT_ALL);
case UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC:
return HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACCESS_TYPE, VIRT_WRITE_AND_ATOMIC);
default:
UVM_ASSERT_MSG(false, "Invalid cancel_va_mode %d\n", cancel_va_mode);
}
return 0;
}
void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
const uvm_fault_buffer_entry_t *fault_entry,
uvm_fault_cancel_va_mode_t cancel_va_mode)
{
NvU32 aperture_value;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 addr_lo;
NvU32 addr_hi;
NvU32 access_type_value;
NvU64 addr = fault_entry->fault_address;
NvU32 mmu_engine_id = fault_entry->fault_source.mmu_engine_id;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx not aligned to 4KB\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
access_type_value = fault_cancel_va_mode_to_cancel_access_type(cancel_va_mode);
UVM_ASSERT_MSG(IS_ALIGNED(addr, 1 << 12), "addr 0x%llx not aligned to 4KB\n", addr);
addr >>= 12;
addr_lo = addr & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
addr_hi = addr >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, addr_lo) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID, mmu_engine_id),
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, addr_hi),
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, CANCEL_VA_GLOBAL) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, NONE) |
access_type_value |
aperture_value,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE4 is the highest level on Hopper, see the comment in uvm_hopper_mmu.c
// for details.
UVM_ASSERT_MSG(depth < NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
page_table_level = NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
}
void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar)
{
NvU32 aperture_value;
NvU32 page_table_level;
NvU32 pdb_lo;
NvU32 pdb_hi;
NvU32 ack_value = 0;
NvU32 va_lo;
NvU32 va_hi;
NvU64 end;
NvU64 actual_base;
NvU64 actual_size;
NvU64 actual_end;
NvU32 log2_invalidation_size;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
UVM_ASSERT_MSG(IS_ALIGNED(page_size, 1 << 12), "page_size 0x%x\n", page_size);
UVM_ASSERT_MSG(IS_ALIGNED(base, page_size), "base 0x%llx page_size 0x%x\n", base, page_size);
UVM_ASSERT_MSG(IS_ALIGNED(size, page_size), "size 0x%llx page_size 0x%x\n", size, page_size);
UVM_ASSERT_MSG(size > 0, "size 0x%llx\n", size);
// The invalidation size must be a power-of-two number of pages containing
// the passed interval
end = base + size - 1;
log2_invalidation_size = __fls((unsigned long)(end ^ base)) + 1;
if (log2_invalidation_size == 64) {
// Invalidate everything
gpu->parent->host_hal->tlb_invalidate_all(push, pdb, depth, membar);
return;
}
// The hardware aligns the target address down to the invalidation size.
actual_size = 1ULL << log2_invalidation_size;
actual_base = UVM_ALIGN_DOWN(base, actual_size);
actual_end = actual_base + actual_size - 1;
UVM_ASSERT(actual_end >= end);
// The invalidation size field expects log2(invalidation size in 4K), not
// log2(invalidation size in bytes)
log2_invalidation_size -= 12;
// Address to invalidate, as a multiple of 4K.
base >>= 12;
va_lo = base & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
va_hi = base >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
// PDE4 is the highest level on Hopper, see the comment in uvm_hopper_mmu.c
// for details.
UVM_ASSERT_MSG(depth < NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4, "depth %u", depth);
page_table_level = NVC86F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 - depth;
if (membar != UVM_MEMBAR_NONE) {
// If a GPU or SYS membar is needed, ACK_TYPE needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
NV_PUSH_4U(C86F, MEM_OP_A, HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_INVALIDATION_SIZE, log2_invalidation_size) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
uvm_hal_tlb_invalidate_membar(push, membar);
}
void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
UVM_TEST_INVALIDATE_TLB_PARAMS *params)
{
NvU32 ack_value = 0;
NvU32 invalidate_gpc_value = 0;
NvU32 aperture_value = 0;
NvU32 pdb_lo = 0;
NvU32 pdb_hi = 0;
NvU32 page_table_level = 0;
uvm_membar_t membar;
UVM_ASSERT_MSG(pdb.aperture == UVM_APERTURE_VID || pdb.aperture == UVM_APERTURE_SYS, "aperture: %u", pdb.aperture);
if (pdb.aperture == UVM_APERTURE_VID)
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, VID_MEM);
else
aperture_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_APERTURE, SYS_MEM_COHERENT);
UVM_ASSERT_MSG(IS_ALIGNED(pdb.address, 1 << 12), "pdb 0x%llx\n", pdb.address);
pdb.address >>= 12;
pdb_lo = pdb.address & HWMASK(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
pdb_hi = pdb.address >> HWSIZE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO);
if (params->page_table_level != UvmInvalidatePageTableLevelAll) {
// PDE4 is the highest level on Hopper, see the comment in
// uvm_hopper_mmu.c for details.
page_table_level = min((NvU32)UvmInvalidatePageTableLevelPde4, params->page_table_level) - 1;
}
if (params->membar != UvmInvalidateTlbMemBarNone) {
// If a GPU or SYS membar is needed, ack_value needs to be set to
// GLOBALLY to make sure all the pending accesses can be picked up by
// the membar.
ack_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_ACK_TYPE, GLOBALLY);
}
if (params->disable_gpc_invalidate)
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, DISABLE);
else
invalidate_gpc_value = HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_GPC, ENABLE);
if (params->target_va_mode == UvmTargetVaModeTargeted) {
NvU64 va = params->va >> 12;
NvU32 va_lo = va & HWMASK(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NvU32 va_hi = va >> HWSIZE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO);
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS) |
HWVALUE(C86F, MEM_OP_A, TLB_INVALIDATE_TARGET_ADDR_LO, va_lo),
MEM_OP_B, HWVALUE(C86F, MEM_OP_B, TLB_INVALIDATE_TARGET_ADDR_HI, va_hi),
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE_TARGETED) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
else {
NV_PUSH_4U(C86F, MEM_OP_A, HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_SYSMEMBAR, DIS) |
HWCONST(C86F, MEM_OP_A, TLB_INVALIDATE_INVAL_SCOPE, NON_LINK_TLBS),
MEM_OP_B, 0,
MEM_OP_C, HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_REPLAY, NONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PAGE_TABLE_LEVEL, page_table_level) |
HWCONST(C86F, MEM_OP_C, TLB_INVALIDATE_PDB, ONE) |
HWVALUE(C86F, MEM_OP_C, TLB_INVALIDATE_PDB_ADDR_LO, pdb_lo) |
invalidate_gpc_value |
aperture_value |
ack_value,
MEM_OP_D, HWCONST(C86F, MEM_OP_D, OPERATION, MMU_TLB_INVALIDATE) |
HWVALUE(C86F, MEM_OP_D, TLB_INVALIDATE_PDB_ADDR_HI, pdb_hi));
}
if (params->membar == UvmInvalidateTlbMemBarSys)
membar = UVM_MEMBAR_SYS;
else if (params->membar == UvmInvalidateTlbMemBarLocal)
membar = UVM_MEMBAR_GPU;
else
membar = UVM_MEMBAR_NONE;
uvm_hal_tlb_invalidate_membar(push, membar);
}
void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va)
{
UVM_ASSERT(!uvm_global_is_suspended());
UVM_ASSERT((pushbuffer_va >> 40) == ((NvU64)(pushbuffer_va + UVM_PUSHBUFFER_SIZE) >> 40));
*fifo_entry = HWVALUE(C86F, GP_ENTRY0, PB_EXTENDED_BASE_OPERAND, (pushbuffer_va >> 40));
*fifo_entry |= (NvU64)(HWCONST(C86F, GP_ENTRY1, OPCODE, SET_PB_SEGMENT_EXTENDED_BASE)) << 32;
}
void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_va, NvU32 pushbuffer_length)
{
NvU64 fifo_entry_value;
NvU64 pb_low_bits_mask = (1ull << 40) - 1;
UVM_ASSERT(!uvm_global_is_suspended());
UVM_ASSERT_MSG(IS_ALIGNED(pushbuffer_va, 4), "pushbuffer va unaligned: %llu\n", pushbuffer_va);
UVM_ASSERT_MSG(IS_ALIGNED(pushbuffer_length, 4), "pushbuffer length unaligned: %u\n", pushbuffer_length);
// The PBDMA/Esched fetches the lower 40-bits of a pushbuffer VA from the
// GPFIFO entry. The VA upper bits are stored internally by Esched and set
// by uvm_channel_write_ctrl_gpfifo()/set_gpfifo_pushbuffer_segment_base().
pushbuffer_va &= pb_low_bits_mask;
fifo_entry_value = HWVALUE(C86F, GP_ENTRY0, GET, NvU64_LO32(pushbuffer_va) >> 2);
fifo_entry_value |= (NvU64)(HWVALUE(C86F, GP_ENTRY1, GET_HI, NvU64_HI32(pushbuffer_va)) |
HWVALUE(C86F, GP_ENTRY1, LENGTH, pushbuffer_length >> 2)) << 32;
*fifo_entry = fifo_entry_value;
}

View File

@ -0,0 +1,501 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
// For Hopper, the UVM page tree 'depth' maps to hardware as follows:
//
// UVM depth HW level VA bits
// 0 PDE4 56:56
// 1 PDE3 55:47
// 2 PDE2 46:38
// 3 PDE1 (or 512M PTE) 37:29
// 4 PDE0 (dual 64k/4k PDE, or 2M PTE) 28:21
// 5 PTE_64K / PTE_4K 20:16 / 20:12
#include "uvm_types.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_hal_types.h"
#include "uvm_hopper_fault_buffer.h"
#include "hwref/hopper/gh100/dev_fault.h"
#include "hwref/hopper/gh100/dev_mmu.h"
#define MMU_BIG 0
#define MMU_SMALL 1
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
{
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
return UVM_MMU_ENGINE_TYPE_HOST;
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_CE0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_CE9)
return UVM_MMU_ENGINE_TYPE_CE;
// We shouldn't be servicing faults from any other engines
UVM_ASSERT_MSG(mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_GRAPHICS, "Unexpected engine ID: 0x%x\n", mmu_engine_id);
return UVM_MMU_ENGINE_TYPE_GRAPHICS;
}
static NvU32 page_table_depth_hopper(NvU32 page_size)
{
// The common-case is page_size == UVM_PAGE_SIZE_2M, hence the first check
if (page_size == UVM_PAGE_SIZE_2M)
return 4;
else if (page_size == UVM_PAGE_SIZE_512M)
return 3;
return 5;
}
static NvU32 entries_per_index_hopper(NvU32 depth)
{
UVM_ASSERT(depth < 6);
if (depth == 4)
return 2;
return 1;
}
static NvLength entry_offset_hopper(NvU32 depth, NvU32 page_size)
{
UVM_ASSERT(depth < 6);
if ((page_size == UVM_PAGE_SIZE_4K) && (depth == 4))
return MMU_SMALL;
return MMU_BIG;
}
static NvLength entry_size_hopper(NvU32 depth)
{
return entries_per_index_hopper(depth) * 8;
}
static NvU32 index_bits_hopper(NvU32 depth, NvU32 page_size)
{
static const NvU32 bit_widths[] = {1, 9, 9, 9, 8};
// some code paths keep on querying this until they get a 0, meaning only
// the page offset remains.
UVM_ASSERT(depth < 6);
if (depth < 5) {
return bit_widths[depth];
}
else if (depth == 5) {
switch (page_size) {
case UVM_PAGE_SIZE_4K:
return 9;
case UVM_PAGE_SIZE_64K:
return 5;
default:
break;
}
}
return 0;
}
static NvU32 num_va_bits_hopper(void)
{
return 57;
}
static NvLength allocation_size_hopper(NvU32 depth, NvU32 page_size)
{
UVM_ASSERT(depth < 6);
if (depth == 5 && page_size == UVM_PAGE_SIZE_64K)
return 256;
// depth 0 requires only a 16-byte allocation, but it must be 4k aligned.
return 4096;
}
// PTE Permission Control Flags
static NvU64 pte_pcf(uvm_prot_t prot, NvU64 flags)
{
bool ac = !(flags & UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED);
bool cached = flags & UVM_MMU_PTE_FLAGS_CACHED;
UVM_ASSERT(prot != UVM_PROT_NONE);
UVM_ASSERT((flags & ~UVM_MMU_PTE_FLAGS_MASK) == 0);
if (ac) {
switch (prot) {
case UVM_PROT_READ_ONLY:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACE :
NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACE;
case UVM_PROT_READ_WRITE:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACE :
NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACE;
case UVM_PROT_READ_WRITE_ATOMIC:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACE :
NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACE;
default:
break;
}
}
else {
switch (prot) {
case UVM_PROT_READ_ONLY:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_CACHED_ACD :
NV_MMU_VER3_PTE_PCF_REGULAR_RO_NO_ATOMIC_UNCACHED_ACD;
case UVM_PROT_READ_WRITE:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_CACHED_ACD :
NV_MMU_VER3_PTE_PCF_REGULAR_RW_NO_ATOMIC_UNCACHED_ACD;
case UVM_PROT_READ_WRITE_ATOMIC:
return cached ? NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_CACHED_ACD :
NV_MMU_VER3_PTE_PCF_REGULAR_RW_ATOMIC_UNCACHED_ACD;
default:
break;
}
}
// Unsupported PCF
UVM_ASSERT_MSG(0, "Unsupported PTE PCF: prot: %s, ac: %d, cached: %d\n", uvm_prot_string(prot), ac, cached);
return NV_MMU_VER3_PTE_PCF_INVALID;
}
static NvU64 make_pte_hopper(uvm_aperture_t aperture, NvU64 address, uvm_prot_t prot, NvU64 flags)
{
NvU8 aperture_bits = 0;
NvU64 pte_bits = 0;
// valid 0:0
pte_bits |= HWCONST64(_MMU_VER3, PTE, VALID, TRUE);
// aperture 2:1
if (aperture == UVM_APERTURE_SYS)
aperture_bits = NV_MMU_VER3_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
else if (aperture == UVM_APERTURE_VID)
aperture_bits = NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY;
else if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7)
aperture_bits = NV_MMU_VER3_PTE_APERTURE_PEER_MEMORY;
else
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", aperture);
pte_bits |= HWVALUE64(_MMU_VER3, PTE, APERTURE, aperture_bits);
// PCF (permission control flags) 7:3
pte_bits |= HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(prot, flags));
// kind 11:8
pte_bits |= HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_GENERIC_MEMORY);
address >>= NV_MMU_VER3_PTE_ADDRESS_SHIFT;
if (aperture == UVM_APERTURE_VID) {
// vid address 39:12
pte_bits |= HWVALUE64(_MMU_VER3, PTE, ADDRESS_VID, address);
}
else {
// sys/peer address 51:12
pte_bits |= HWVALUE64(_MMU_VER3, PTE, ADDRESS, address);
// peer id 63:61
if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7)
pte_bits |= HWVALUE64(_MMU_VER3, PTE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
}
return pte_bits;
}
static NvU64 make_sked_reflected_pte_hopper(void)
{
return HWCONST64(_MMU_VER3, PTE, VALID, TRUE) |
HWVALUE64(_MMU_VER3, PTE, PCF, pte_pcf(UVM_PROT_READ_WRITE_ATOMIC, UVM_MMU_PTE_FLAGS_NONE)) |
HWVALUE64(_MMU_VER3, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
}
static NvU64 make_sparse_pte_hopper(void)
{
return HWCONST64(_MMU_VER3, PTE, VALID, FALSE) |
HWCONST64(_MMU_VER3, PTE, PCF, SPARSE);
}
static NvU64 unmapped_pte_hopper(NvU32 page_size)
{
// Setting PCF to NO_VALID_4KB_PAGE on an otherwise-zeroed big PTE causes
// the corresponding 4k PTEs to be ignored. This allows the invalidation of
// a mixed PDE range to be much faster.
if (page_size != UVM_PAGE_SIZE_64K)
return 0;
// When VALID == 0, GMMU still reads the PCF field, which indicates the PTE
// is sparse (make_sparse_pte_hopper) or an unmapped big-page PTE.
return HWCONST64(_MMU_VER3, PTE, VALID, FALSE) |
HWCONST64(_MMU_VER3, PTE, PCF, NO_VALID_4KB_PAGE);
}
static NvU64 poisoned_pte_hopper(void)
{
// An invalid PTE won't be fatal from faultable units like SM, which is the
// most likely source of bad PTE accesses.
// Engines with priv accesses won't fault on the priv PTE, so add a backup
// mechanism using an impossible memory address. MMU will trigger an
// interrupt when it detects a bad physical address, i.e., a physical
// address > GPU memory size.
//
// This address has to fit within 38 bits (max address width of vidmem) and
// be aligned to page_size.
NvU64 phys_addr = 0x2bad000000ULL;
NvU64 pte_bits = make_pte_hopper(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
}
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
{
NvU64 pde_bits = 0;
if (phys_alloc != NULL) {
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_PDE_ADDRESS_SHIFT;
pde_bits |= HWCONST64(_MMU_VER3, PDE, IS_PTE, FALSE);
switch (phys_alloc->addr.aperture) {
case UVM_APERTURE_SYS:
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, SYSTEM_COHERENT_MEMORY);
break;
case UVM_APERTURE_VID:
pde_bits |= HWCONST64(_MMU_VER3, PDE, APERTURE, VIDEO_MEMORY);
break;
default:
UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
break;
}
// PCF (permission control flags) 5:3
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
// ATS and GMMU page tables. For managed memory we need to prevent this
// parallel lookup since we would not get any GPU fault if the CPU has
// a valid mapping. Also, for external ranges that are known to be
// mapped entirely on the GMMU page table we can skip the ATS lookup
// for performance reasons. Parallel ATS lookup is disabled in PDE1
// (depth 3) and, therefore, it applies to the underlying 512MB VA
// range.
//
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
// This is fine because CUDA ensures that all managed and external
// allocations are properly compartmentalized in 512MB-aligned VA
// regions. For cudaHostRegister CUDA cannot control the VA range, but
// we rely on ATS for those allocations so they can't choose the
// ATS_NOT_ALLOWED mode.
//
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
// to PTEs.
if (depth == 3 && g_uvm_global.ats.enabled)
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
else
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
// address 51:12
pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
}
return pde_bits;
}
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
{
NvU64 pde_bits = 0;
if (phys_alloc != NULL) {
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_DUAL_PDE_ADDRESS_BIG_SHIFT;
switch (phys_alloc->addr.aperture) {
case UVM_APERTURE_SYS:
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, SYSTEM_COHERENT_MEMORY);
break;
case UVM_APERTURE_VID:
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY);
break;
default:
UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
break;
}
// PCF (permission control flags) 5:3
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
// address 51:8
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
}
return pde_bits;
}
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
{
NvU64 pde_bits = 0;
if (phys_alloc != NULL) {
NvU64 address = phys_alloc->addr.address >> NV_MMU_VER3_DUAL_PDE_ADDRESS_SHIFT;
switch (phys_alloc->addr.aperture) {
case UVM_APERTURE_SYS:
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, SYSTEM_COHERENT_MEMORY);
break;
case UVM_APERTURE_VID:
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
break;
default:
UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);
break;
}
// PCF (permission control flags) 69:67 [5:3]
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
// address 115:76 [51:12]
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
}
return pde_bits;
}
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
{
NvU32 entry_count = entries_per_index_hopper(depth);
NvU64 *entry_bits = (NvU64 *)entry;
if (entry_count == 1) {
*entry_bits = single_pde_hopper(*phys_allocs, depth);
}
else if (entry_count == 2) {
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
// This entry applies to the whole dual PDE but is stored in the lower
// bits
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
}
else {
UVM_ASSERT_MSG(0, "Invalid number of entries per index: %d\n", entry_count);
}
}
static uvm_mmu_mode_hal_t hopper_mmu_mode_hal;
uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size)
{
static bool initialized = false;
UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
// TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
// 128K big page size for Pascal+ GPUs
if (big_page_size == UVM_PAGE_SIZE_128K)
return NULL;
if (!initialized) {
uvm_mmu_mode_hal_t *ampere_mmu_mode_hal = uvm_hal_mmu_mode_ampere(big_page_size);
UVM_ASSERT(ampere_mmu_mode_hal);
// The assumption made is that arch_hal->mmu_mode_hal() will be called
// under the global lock the first time, so check it here.
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
hopper_mmu_mode_hal = *ampere_mmu_mode_hal;
hopper_mmu_mode_hal.entry_size = entry_size_hopper;
hopper_mmu_mode_hal.index_bits = index_bits_hopper;
hopper_mmu_mode_hal.entries_per_index = entries_per_index_hopper;
hopper_mmu_mode_hal.entry_offset = entry_offset_hopper;
hopper_mmu_mode_hal.num_va_bits = num_va_bits_hopper;
hopper_mmu_mode_hal.allocation_size = allocation_size_hopper;
hopper_mmu_mode_hal.page_table_depth = page_table_depth_hopper;
hopper_mmu_mode_hal.make_pte = make_pte_hopper;
hopper_mmu_mode_hal.make_sked_reflected_pte = make_sked_reflected_pte_hopper;
hopper_mmu_mode_hal.make_sparse_pte = make_sparse_pte_hopper;
hopper_mmu_mode_hal.unmapped_pte = unmapped_pte_hopper;
hopper_mmu_mode_hal.poisoned_pte = poisoned_pte_hopper;
hopper_mmu_mode_hal.make_pde = make_pde_hopper;
initialized = true;
}
return &hopper_mmu_mode_hal;
}
NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id)
{
switch (client_id) {
case NV_PFAULT_CLIENT_GPC_RAST:
case NV_PFAULT_CLIENT_GPC_GCC:
case NV_PFAULT_CLIENT_GPC_GPCCS:
return UVM_HOPPER_GPC_UTLB_ID_RGG;
case NV_PFAULT_CLIENT_GPC_T1_0:
return UVM_HOPPER_GPC_UTLB_ID_LTP0;
case NV_PFAULT_CLIENT_GPC_T1_1:
case NV_PFAULT_CLIENT_GPC_PE_0:
case NV_PFAULT_CLIENT_GPC_TPCCS_0:
return UVM_HOPPER_GPC_UTLB_ID_LTP1;
case NV_PFAULT_CLIENT_GPC_T1_2:
return UVM_HOPPER_GPC_UTLB_ID_LTP2;
case NV_PFAULT_CLIENT_GPC_T1_3:
case NV_PFAULT_CLIENT_GPC_PE_1:
case NV_PFAULT_CLIENT_GPC_TPCCS_1:
return UVM_HOPPER_GPC_UTLB_ID_LTP3;
case NV_PFAULT_CLIENT_GPC_T1_4:
return UVM_HOPPER_GPC_UTLB_ID_LTP4;
case NV_PFAULT_CLIENT_GPC_T1_5:
case NV_PFAULT_CLIENT_GPC_PE_2:
case NV_PFAULT_CLIENT_GPC_TPCCS_2:
return UVM_HOPPER_GPC_UTLB_ID_LTP5;
case NV_PFAULT_CLIENT_GPC_T1_6:
return UVM_HOPPER_GPC_UTLB_ID_LTP6;
case NV_PFAULT_CLIENT_GPC_T1_7:
case NV_PFAULT_CLIENT_GPC_PE_3:
case NV_PFAULT_CLIENT_GPC_TPCCS_3:
return UVM_HOPPER_GPC_UTLB_ID_LTP7;
case NV_PFAULT_CLIENT_GPC_T1_8:
return UVM_HOPPER_GPC_UTLB_ID_LTP8;
case NV_PFAULT_CLIENT_GPC_T1_9:
case NV_PFAULT_CLIENT_GPC_PE_4:
case NV_PFAULT_CLIENT_GPC_TPCCS_4:
return UVM_HOPPER_GPC_UTLB_ID_LTP9;
case NV_PFAULT_CLIENT_GPC_T1_10:
return UVM_HOPPER_GPC_UTLB_ID_LTP10;
case NV_PFAULT_CLIENT_GPC_T1_11:
case NV_PFAULT_CLIENT_GPC_PE_5:
case NV_PFAULT_CLIENT_GPC_TPCCS_5:
return UVM_HOPPER_GPC_UTLB_ID_LTP11;
case NV_PFAULT_CLIENT_GPC_T1_12:
return UVM_HOPPER_GPC_UTLB_ID_LTP12;
case NV_PFAULT_CLIENT_GPC_T1_13:
case NV_PFAULT_CLIENT_GPC_PE_6:
case NV_PFAULT_CLIENT_GPC_TPCCS_6:
return UVM_HOPPER_GPC_UTLB_ID_LTP13;
case NV_PFAULT_CLIENT_GPC_T1_14:
return UVM_HOPPER_GPC_UTLB_ID_LTP14;
case NV_PFAULT_CLIENT_GPC_T1_15:
case NV_PFAULT_CLIENT_GPC_PE_7:
case NV_PFAULT_CLIENT_GPC_TPCCS_7:
return UVM_HOPPER_GPC_UTLB_ID_LTP15;
case NV_PFAULT_CLIENT_GPC_T1_16:
return UVM_HOPPER_GPC_UTLB_ID_LTP16;
case NV_PFAULT_CLIENT_GPC_T1_17:
case NV_PFAULT_CLIENT_GPC_PE_8:
case NV_PFAULT_CLIENT_GPC_TPCCS_8:
return UVM_HOPPER_GPC_UTLB_ID_LTP17;
default:
UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
}
return 0;
}

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2020-2021 NVIDIA Corporation Copyright (c) 2020-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -46,7 +46,7 @@ static NV_STATUS test_semaphore_alloc_uvm_rm_mem(uvm_gpu_t *gpu, const size_t si
uvm_rm_mem_t *mem = NULL; uvm_rm_mem_t *mem = NULL;
NvU64 gpu_va; NvU64 gpu_va;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, &mem); status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem);
TEST_NV_CHECK_RET(status); TEST_NV_CHECK_RET(status);
gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu); gpu_va = uvm_rm_mem_get_gpu_uvm_va(mem, gpu);

View File

@ -88,16 +88,12 @@
#include "nv-kthread-q.h" #include "nv-kthread-q.h"
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT) #if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
#define UVM_THREAD_AFFINITY_SUPPORTED() 1 #define UVM_THREAD_AFFINITY_SUPPORTED() 1
#else #else
#define UVM_THREAD_AFFINITY_SUPPORTED() 0 #define UVM_THREAD_AFFINITY_SUPPORTED() 0
#endif #endif
// The ARM arch lacks support for cpumask_of_node() until kernel 4.7. It was // The ARM arch lacks support for cpumask_of_node() until kernel 4.7. It was
// added via commit1a2db300348b ("arm64, numa: Add NUMA support for arm64 // added via commit1a2db300348b ("arm64, numa: Add NUMA support for arm64
// platforms.") Callers should either check UVM_THREAD_AFFINITY_SUPPORTED() // platforms.") Callers should either check UVM_THREAD_AFFINITY_SUPPORTED()
@ -112,16 +108,12 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#endif #endif
} }
#if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT) #if defined(CONFIG_HMM_MIRROR) && defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MAKE_DEVICE_EXCLUSIVE_RANGE_PRESENT)
#define UVM_IS_CONFIG_HMM() 1 #define UVM_IS_CONFIG_HMM() 1
#else #else
#define UVM_IS_CONFIG_HMM() 0 #define UVM_IS_CONFIG_HMM() 0
#endif #endif
// Various issues prevent us from using mmu_notifiers in older kernels. These // Various issues prevent us from using mmu_notifiers in older kernels. These
// include: // include:
// - ->release being called under RCU instead of SRCU: fixed by commit // - ->release being called under RCU instead of SRCU: fixed by commit
@ -137,21 +129,12 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
// //
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e, // The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
// v3.19 (2014-11-13). // v3.19 (2014-11-13).
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE) #if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
#define UVM_CAN_USE_MMU_NOTIFIERS() 1 #define UVM_CAN_USE_MMU_NOTIFIERS() 1
#else #else
#define UVM_CAN_USE_MMU_NOTIFIERS() 0 #define UVM_CAN_USE_MMU_NOTIFIERS() 0
#endif #endif
// See bug 1707453 for further details about setting the minimum kernel version. // See bug 1707453 for further details about setting the minimum kernel version.
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
# error This driver does not support kernels older than 2.6.32! # error This driver does not support kernels older than 2.6.32!
@ -543,7 +526,6 @@ typedef struct
struct mem_cgroup *old_memcg; struct mem_cgroup *old_memcg;
} uvm_memcg_context_t; } uvm_memcg_context_t;
// cgroup support requires set_active_memcg(). set_active_memcg() is an // cgroup support requires set_active_memcg(). set_active_memcg() is an
// inline function that requires int_active_memcg per-cpu symbol when called // inline function that requires int_active_memcg per-cpu symbol when called
// from interrupt context. int_active_memcg is only exported by commit // from interrupt context. int_active_memcg is only exported by commit
@ -585,29 +567,13 @@ typedef struct
} }
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg #endif // NV_IS_EXPORT_SYMBOL_PRESENT_int_active_memcg
#if defined(NVCPU_X86) || defined(NVCPU_X86_64)
#include <asm/pgtable_types.h>
#endif
#if !defined(PAGE_KERNEL_NOENC)
#define PAGE_KERNEL_NOENC PAGE_KERNEL
#endif
// Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7). // Commit 1dff8083a024650c75a9c961c38082473ceae8cf (v4.7).
// //

View File

@ -27,12 +27,8 @@
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order) const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{ {
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26); BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 26);
switch (lock_order) { switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
@ -48,14 +44,8 @@ const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_API);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_RM_GPUS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_BLOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_CHUNK_MAPPING);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PAGE_TREE);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PUSH);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA); UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_PMM_PMA);

View File

@ -276,16 +276,6 @@
// Operations not allowed while holding the lock: // Operations not allowed while holding the lock:
// - GPU memory allocation which can evict memory (would require nesting // - GPU memory allocation which can evict memory (would require nesting
// block locks) // block locks)
// - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and // - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
// gpu->sysmem_mappings.bitlock) // gpu->sysmem_mappings.bitlock)
// Order: UVM_LOCK_ORDER_CHUNK_MAPPING // Order: UVM_LOCK_ORDER_CHUNK_MAPPING
@ -320,20 +310,6 @@
// Operations not allowed while holding this lock // Operations not allowed while holding this lock
// - GPU memory allocation which can evict // - GPU memory allocation which can evict
// //
// - Concurrent push semaphore // - Concurrent push semaphore
// Order: UVM_LOCK_ORDER_PUSH // Order: UVM_LOCK_ORDER_PUSH
// Semaphore (uvm_semaphore_t) // Semaphore (uvm_semaphore_t)
@ -408,14 +384,8 @@ typedef enum
UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API,
UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS,
UVM_LOCK_ORDER_VA_BLOCK, UVM_LOCK_ORDER_VA_BLOCK,
UVM_LOCK_ORDER_CHUNK_MAPPING, UVM_LOCK_ORDER_CHUNK_MAPPING,
UVM_LOCK_ORDER_PAGE_TREE, UVM_LOCK_ORDER_PAGE_TREE,
UVM_LOCK_ORDER_PUSH, UVM_LOCK_ORDER_PUSH,
UVM_LOCK_ORDER_PMM, UVM_LOCK_ORDER_PMM,
UVM_LOCK_ORDER_PMM_PMA, UVM_LOCK_ORDER_PMM_PMA,

View File

@ -636,19 +636,6 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
{ {
uvm_gpu_t *owning_gpu; uvm_gpu_t *owning_gpu;
// This is a local or peer allocation, so the owning GPU must have been // This is a local or peer allocation, so the owning GPU must have been
// registered. // registered.
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid); owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation Copyright (c) 2021-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -233,6 +233,19 @@ void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry, NvU64 pushbuffer_v
*fifo_entry = fifo_entry_value; *fifo_entry = fifo_entry_value;
} }
void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry)
{
UVM_ASSERT(!uvm_global_is_suspended());
// A NOOP control GPFIFO does not require a GP_ENTRY0.
*fifo_entry = (NvU64)(HWVALUE(A16F, GP_ENTRY1, LENGTH, 0) | HWCONST(A16F, GP_ENTRY1, OPCODE, NOP)) << 32;
}
void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va)
{
UVM_ASSERT_MSG(false, "host set_gpfifo_pushbuffer_segment_base called on Maxwell GPU\n");
}
void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put) void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put)
{ {
UVM_GPU_WRITE_ONCE(*channel->channel_info.gpPut, gpu_put); UVM_GPU_WRITE_ONCE(*channel->channel_info.gpPut, gpu_put);

View File

@ -71,11 +71,9 @@ static bool sysmem_can_be_mapped(uvm_mem_t *sysmem)
{ {
UVM_ASSERT(uvm_mem_is_sysmem(sysmem)); UVM_ASSERT(uvm_mem_is_sysmem(sysmem));
// If SEV is enabled, only unprotected memory can be mapped
if (g_uvm_global.sev_enabled)
return uvm_mem_is_sysmem_dma(sysmem);
return true; return true;
} }
@ -451,11 +449,6 @@ static gfp_t sysmem_allocation_gfp_flags(int order, bool zero)
return gfp_flags; return gfp_flags;
} }
// There is a tighter coupling between allocation and mapping because of the // There is a tighter coupling between allocation and mapping because of the
// allocator UVM must use. Hence, this function does the equivalent of // allocator UVM must use. Hence, this function does the equivalent of
// uvm_mem_map_gpu_phys(). // uvm_mem_map_gpu_phys().
@ -731,10 +724,8 @@ static NV_STATUS mem_map_cpu_to_sysmem_kernel(uvm_mem_t *mem)
pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE); pages[page_index] = mem_cpu_page(mem, page_index * PAGE_SIZE);
} }
if (g_uvm_global.sev_enabled)
prot = PAGE_KERNEL_NOENC;
mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot); mem->kernel.cpu_addr = vmap(pages, num_pages, VM_MAP, prot);
@ -1068,18 +1059,6 @@ static NV_STATUS mem_map_gpu(uvm_mem_t *mem,
page_size = mem_pick_gpu_page_size(mem, gpu, tree); page_size = mem_pick_gpu_page_size(mem, gpu, tree);
UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size); UVM_ASSERT_MSG(uvm_mmu_page_size_supported(tree, page_size), "page_size 0x%x\n", page_size);
status = uvm_page_table_range_vec_create(tree, status = uvm_page_table_range_vec_create(tree,
gpu_va, gpu_va,
mem->physical_allocation_size, mem->physical_allocation_size,

View File

@ -161,10 +161,6 @@ struct uvm_mem_struct
// lifetime of the GPU. For CPU allocations there is no lifetime limitation. // lifetime of the GPU. For CPU allocations there is no lifetime limitation.
uvm_gpu_t *backing_gpu; uvm_gpu_t *backing_gpu;
uvm_gpu_t *dma_owner; uvm_gpu_t *dma_owner;
// Size of the physical chunks. // Size of the physical chunks.
@ -396,14 +392,12 @@ static NV_STATUS uvm_mem_alloc_sysmem_and_map_cpu_kernel(NvU64 size, struct mm_s
return NV_OK; return NV_OK;
} }
// Helper for allocating sysmem DMA and mapping it on the CPU. This is useful
// for certain systems where the main system memory is encrypted
// (e.g., AMD SEV) and cannot be read from IO devices unless specially
// allocated using the DMA APIs.
//
// See uvm_mem_alloc()
static NV_STATUS uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(NvU64 size, static NV_STATUS uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(NvU64 size,
uvm_gpu_t *gpu, uvm_gpu_t *gpu,
struct mm_struct *mm, struct mm_struct *mm,

View File

@ -25,9 +25,6 @@
#include "uvm_kvmalloc.h" #include "uvm_kvmalloc.h"
#include "uvm_mem.h" #include "uvm_mem.h"
#include "uvm_push.h" #include "uvm_push.h"
#include "uvm_test.h" #include "uvm_test.h"
#include "uvm_test_ioctl.h" #include "uvm_test_ioctl.h"
#include "uvm_va_space.h" #include "uvm_va_space.h"
@ -46,14 +43,10 @@ static NvU32 first_page_size(NvU32 page_sizes)
static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem) static inline NV_STATUS __alloc_map_sysmem(NvU64 size, uvm_gpu_t *gpu, uvm_mem_t **sys_mem)
{ {
if (g_uvm_global.sev_enabled)
return uvm_mem_alloc_sysmem_dma_and_map_cpu_kernel(size, gpu, current->mm, sys_mem);
else
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
return uvm_mem_alloc_sysmem_and_map_cpu_kernel(size, current->mm, sys_mem);
} }
static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem) static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
@ -94,12 +87,8 @@ static NV_STATUS check_accessible_from_gpu(uvm_gpu_t *gpu, uvm_mem_t *mem)
sys_mem_gpu_address = uvm_mem_gpu_address_virtual_kernel(sys_mem, gpu); sys_mem_gpu_address = uvm_mem_gpu_address_virtual_kernel(sys_mem, gpu);
sys_mem_gpu_address.address += offset; sys_mem_gpu_address.address += offset;
should_use_pa = uvm_channel_is_privileged(push.channel); should_use_pa = uvm_channel_is_privileged(push.channel);
if (should_use_pa) { if (should_use_pa) {
mem_gpu_address = uvm_mem_gpu_address_physical(mem, gpu, offset, size_this_time); mem_gpu_address = uvm_mem_gpu_address_physical(mem, gpu, offset, size_this_time);
} }
@ -348,10 +337,8 @@ error:
static bool should_test_page_size(size_t alloc_size, NvU32 page_size) static bool should_test_page_size(size_t alloc_size, NvU32 page_size)
{ {
if (g_uvm_global.sev_enabled)
return false;
if (g_uvm_global.num_simulated_devices == 0) if (g_uvm_global.num_simulated_devices == 0)
return true; return true;
@ -543,45 +530,6 @@ done:
return status; return status;
} }
static NV_STATUS test_basic(uvm_va_space_t *va_space) static NV_STATUS test_basic(uvm_va_space_t *va_space)
{ {
uvm_gpu_t *gpu; uvm_gpu_t *gpu;
@ -591,9 +539,6 @@ static NV_STATUS test_basic(uvm_va_space_t *va_space)
for_each_va_space_gpu(gpu, va_space) { for_each_va_space_gpu(gpu, va_space) {
TEST_CHECK_RET(test_basic_vidmem(gpu) == NV_OK); TEST_CHECK_RET(test_basic_vidmem(gpu) == NV_OK);
TEST_CHECK_RET(test_basic_sysmem_dma(gpu) == NV_OK); TEST_CHECK_RET(test_basic_sysmem_dma(gpu) == NV_OK);
} }
return NV_OK; return NV_OK;

View File

@ -35,15 +35,9 @@
#include "uvm_va_space.h" #include "uvm_va_space.h"
// The page tree has 6 levels on Hopper+ GPUs, and the root is never freed by a
// normal 'put' operation which leaves a maximum of 5 levels.
#define MAX_OPERATION_DEPTH 5
// The page tree has 5 levels on Pascal, and the root is never freed by a normal
// 'put' operation which leaves a maximum of 4 levels
#define MAX_OPERATION_DEPTH 4
// Wrappers for push begin handling channel_manager not being there when running // Wrappers for push begin handling channel_manager not being there when running
// the page tree unit test // the page tree unit test
@ -720,11 +714,7 @@ error:
// default | vidmem || vidmem | false // default | vidmem || vidmem | false
// default | sysmem || sysmem | false // default | sysmem || sysmem | false
// //
// (1) When SEV mode is enabled, the fallback path is disabled.
// (1) The fallback to sysmem is always enabled.
// //
// In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers // In SR-IOV heavy the the page tree must be in vidmem, to prevent guest drivers
// from updating GPU page tables without hypervisor knowledge. // from updating GPU page tables without hypervisor knowledge.
@ -753,12 +743,8 @@ static void page_tree_set_location(uvm_page_tree_t *tree, uvm_aperture_t locatio
if (page_table_aperture == UVM_APERTURE_DEFAULT) { if (page_table_aperture == UVM_APERTURE_DEFAULT) {
tree->location = UVM_APERTURE_VID; tree->location = UVM_APERTURE_VID;
// See the comment (1) above.
tree->location_sys_fallback = !g_uvm_global.sev_enabled;
tree->location_sys_fallback = true;
} }
else { else {
tree->location = page_table_aperture; tree->location = page_table_aperture;
@ -1803,11 +1789,7 @@ static void destroy_identity_mapping(uvm_gpu_identity_mapping_t *mapping)
bool uvm_mmu_gpu_needs_static_vidmem_mapping(uvm_gpu_t *gpu) bool uvm_mmu_gpu_needs_static_vidmem_mapping(uvm_gpu_t *gpu)
{ {
return false; return false;
} }
bool uvm_mmu_gpu_needs_dynamic_vidmem_mapping(uvm_gpu_t *gpu) bool uvm_mmu_gpu_needs_dynamic_vidmem_mapping(uvm_gpu_t *gpu)

View File

@ -37,27 +37,25 @@
// Memory layout of UVM's kernel VA space. // Memory layout of UVM's kernel VA space.
// The following memory regions are not to scale. // The following memory regions are not to scale.
// Hopper:
// +----------------+ 128PB
// | |
// | (not used) |
// | |
// ------------------
// |uvm_mem_t(128GB)| (uvm_mem_va_size)
// ------------------ 64PB + 384TB (uvm_mem_va_base)
// | |
// | (not used) |
// | |
// ------------------ 64PB + 8TB
// |peer ident. maps|
// |32 * 256GB = 8TB| ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE
// ------------------ 64PB
// | |
// | rm_mem(64PB) | (rm_va_size)
// | |
// +----------------+ 0 (rm_va_base)
// //
// Pascal-Ampere: // Pascal-Ampere:
// +----------------+ 512TB // +----------------+ 512TB
@ -74,13 +72,13 @@
// | | // | |
// | (not used) | // | (not used) |
// | | // | |
// ------------------ 132TB + 128GB (UVM_GPU_MAX_PHYS_MEM) // ------------------ 136TB + 256GB (UVM_GPU_MAX_PHYS_MEM)
// | vidmem | // | vidmem |
// | flat mapping | ==> UVM_GPU_MAX_PHYS_MEM // | flat mapping | ==> UVM_GPU_MAX_PHYS_MEM
// | (128GB) | // | (256GB) |
// ------------------ 132TB (flat_vidmem_va_base) // ------------------ 136TB (flat_vidmem_va_base)
// |peer ident. maps| // |peer ident. maps|
// |32 * 128GB = 4TB| ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE // |32 * 256GB = 8TB| ==> NV_MAX_DEVICES * UVM_PEER_IDENTITY_VA_SIZE
// ------------------ 128TB // ------------------ 128TB
// | | // | |
// | rm_mem(128TB) | (rm_va_size) // | rm_mem(128TB) | (rm_va_size)
@ -105,7 +103,7 @@
// +----------------+ 0 (rm_va_base) // +----------------+ 0 (rm_va_base)
// Maximum memory of any GPU. // Maximum memory of any GPU.
#define UVM_GPU_MAX_PHYS_MEM (128ull * 1024 * 1024 * 1024) #define UVM_GPU_MAX_PHYS_MEM (256ull * 1024 * 1024 * 1024)
// The size of VA that should be reserved per peer identity mapping. // The size of VA that should be reserved per peer identity mapping.
// This should be at least the maximum amount of memory of any GPU. // This should be at least the maximum amount of memory of any GPU.

View File

@ -43,11 +43,9 @@
// AMPERE_* // AMPERE_*
#include "clc56f.h" #include "clc56f.h"
#include "clc6b5.h" #include "clc6b5.h"
// HOPPER_*
#include "clc8b5.h"
#include "clc86f.h"
// ARCHITECTURE_* // ARCHITECTURE_*
#include "ctrl2080mc.h" #include "ctrl2080mc.h"
@ -407,33 +405,31 @@ static NV_STATUS alloc_64k_memory(uvm_gpu_t *gpu)
return NV_OK; return NV_OK;
} }
static NV_STATUS alloc_64k_memory_57b_va(uvm_gpu_t *gpu)
{
uvm_page_tree_t tree;
uvm_page_table_range_t range;
NvLength size = 64 * 1024;
MEM_NV_CHECK_RET(test_page_tree_init(gpu, BIG_PAGE_SIZE_PASCAL, &tree), NV_OK);
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, 0x100000000000000ULL, size, &range), NV_OK);
TEST_CHECK_RET(range.entry_count == 1);
TEST_CHECK_RET(range.table->depth == 5);
TEST_CHECK_RET(range.start_index == 0);
TEST_CHECK_RET(range.page_size == UVM_PAGE_SIZE_64K);
TEST_CHECK_RET(tree.root->ref_count == 1);
TEST_CHECK_RET(tree.root->entries[1]->ref_count == 1);
TEST_CHECK_RET(tree.root->entries[1]->entries[0]->ref_count == 1);
TEST_CHECK_RET(tree.root->entries[1]->entries[0]->entries[0]->ref_count == 1);
TEST_CHECK_RET(tree.root->entries[1]->entries[0]->entries[0]->entries[0]->ref_count == 1);
TEST_CHECK_RET(tree.root->entries[1]->entries[0]->entries[0]->entries[0]->entries[0]->ref_count == 1);
TEST_CHECK_RET(range.table == tree.root->entries[1]->entries[0]->entries[0]->entries[0]->entries[0]);
uvm_page_tree_put_ptes(&tree, &range);
UVM_ASSERT(tree.root->ref_count == 0);
uvm_page_tree_deinit(&tree);
return NV_OK;
}
static NV_STATUS alloc_adjacent_64k_memory(uvm_gpu_t *gpu) static NV_STATUS alloc_adjacent_64k_memory(uvm_gpu_t *gpu)
{ {
@ -1513,20 +1509,18 @@ static NV_STATUS entry_test_page_size_ampere(uvm_gpu_t *gpu, size_t page_size)
return entry_test_page_size_volta(gpu, page_size); return entry_test_page_size_volta(gpu, page_size);
} }
static NV_STATUS entry_test_page_size_hopper(uvm_gpu_t *gpu, size_t page_size)
{
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Page table entries
if (page_size == UVM_PAGE_SIZE_64K)
TEST_CHECK_RET(hal->unmapped_pte(page_size) == 0x18);
else
TEST_CHECK_RET(hal->unmapped_pte(page_size) == 0);
return NV_OK;
}
typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size); typedef NV_STATUS (*entry_test_page_size_func)(uvm_gpu_t *gpu, size_t page_size);
@ -1783,109 +1777,107 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
return NV_OK; return NV_OK;
} }
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
size_t i, num_page_sizes;
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
// big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBBB00LL);
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0);
// Cleared PDEs work as expected for big and small PDEs.
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs, uncached ATS allowed.
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
// Dual PDEs, uncached.
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
// access counters disabled.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D);
// change to cached.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
0x9999999999685);
// enable access counters.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605);
// remove atomic
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645);
// read only
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665);
// local video
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_VID,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661);
// peer 1
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_PEER_1,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663);
// sparse
TEST_CHECK_RET(hal->make_sparse_pte() == 0x8);
// sked reflected
TEST_CHECK_RET(hal->make_sked_reflected_pte() == 0xF09);
num_page_sizes = get_page_sizes(gpu, page_sizes);
for (i = 0; i < num_page_sizes; i++)
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
return NV_OK;
}
static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu) static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
{ {
@ -2121,15 +2113,13 @@ static NV_STATUS fake_gpu_init_ampere(uvm_gpu_t *fake_gpu)
fake_gpu); fake_gpu);
} }
static NV_STATUS fake_gpu_init_hopper(uvm_gpu_t *fake_gpu)
{
return fake_gpu_init(HOPPER_CHANNEL_GPFIFO_A,
HOPPER_DMA_COPY_A,
NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GH100,
fake_gpu);
}
static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell) static NV_STATUS maxwell_test_page_tree(uvm_gpu_t *maxwell)
{ {
@ -2271,17 +2261,15 @@ static NV_STATUS ampere_test_page_tree(uvm_gpu_t *ampere)
return NV_OK; return NV_OK;
} }
static NV_STATUS hopper_test_page_tree(uvm_gpu_t *hopper)
{
TEST_CHECK_RET(fake_gpu_init_hopper(hopper) == NV_OK);
MEM_NV_CHECK_RET(entry_test_hopper(hopper, entry_test_page_size_hopper), NV_OK);
MEM_NV_CHECK_RET(alloc_64k_memory_57b_va(hopper), NV_OK);
return NV_OK;
}
NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *filp) NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *filp)
{ {
@ -2315,9 +2303,7 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done); TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done); TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done); TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(hopper_test_page_tree(gpu), done);
done: done:
fake_tlb_invals_free(); fake_tlb_invals_free();

View File

@ -653,7 +653,7 @@ done:
static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes); static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes);
// Destroy the thrashing detection struct for the given block // Destroy the thrashing detection struct for the given block
static void thrashing_info_destroy(uvm_va_block_t *va_block) void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block)
{ {
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block); block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
@ -687,7 +687,7 @@ void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t
if (!va_block) if (!va_block)
return; return;
thrashing_info_destroy(va_block); uvm_perf_thrashing_info_destroy(va_block);
} }
// Sanity checks of the thrashing tracking state // Sanity checks of the thrashing tracking state
@ -1118,9 +1118,9 @@ static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_bl
// Unmap remote mappings from all processors on the pinned pages // Unmap remote mappings from all processors on the pinned pages
// described by region and block_thrashing->pinned pages. // described by region and block_thrashing->pinned pages.
static NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block, NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context, uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region) uvm_va_block_region_t region)
{ {
block_thrashing_info_t *block_thrashing; block_thrashing_info_t *block_thrashing;
uvm_processor_mask_t unmap_processors; uvm_processor_mask_t unmap_processors;
@ -2111,7 +2111,7 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
block_context, block_context,
va_block_region)); va_block_region));
thrashing_info_destroy(va_block); uvm_perf_thrashing_info_destroy(va_block);
uvm_mutex_unlock(&va_block->lock); uvm_mutex_unlock(&va_block->lock);
@ -2123,6 +2123,15 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
} }
} }
} }
status = uvm_hmm_clear_thrashing_policy(va_space);
// Re-enable thrashing on failure to avoid getting asserts
// about having state while thrashing is disabled
if (status != NV_OK) {
va_space_thrashing->params.enable = true;
goto done_unlock_va_space;
}
} }
done_unlock_va_space: done_unlock_va_space:

View File

@ -103,4 +103,13 @@ NV_STATUS uvm_perf_thrashing_register_gpu(uvm_va_space_t *va_space, uvm_gpu_t *g
void uvm_perf_thrashing_stop(uvm_va_space_t *va_space); void uvm_perf_thrashing_stop(uvm_va_space_t *va_space);
void uvm_perf_thrashing_unload(uvm_va_space_t *va_space); void uvm_perf_thrashing_unload(uvm_va_space_t *va_space);
// Destroy the thrashing detection struct for the given block.
void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);
// Unmap remote mappings from all processors on the pinned pages
// described by region and block_thrashing->pinned pages.
NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_va_block_region_t region);
#endif #endif

View File

@ -173,9 +173,6 @@
#include "uvm_test.h" #include "uvm_test.h"
#include "uvm_linux.h" #include "uvm_linux.h"
static int uvm_global_oversubscription = 1; static int uvm_global_oversubscription = 1;
module_param(uvm_global_oversubscription, int, S_IRUGO); module_param(uvm_global_oversubscription, int, S_IRUGO);
MODULE_PARM_DESC(uvm_global_oversubscription, "Enable (1) or disable (0) global oversubscription support."); MODULE_PARM_DESC(uvm_global_oversubscription, "Enable (1) or disable (0) global oversubscription support.");
@ -245,21 +242,11 @@ const char *uvm_pmm_gpu_memory_type_string(uvm_pmm_gpu_memory_type_t type)
{ {
switch (type) { switch (type) {
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_USER); UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_USER);
UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_KERNEL); UVM_ENUM_STRING_CASE(UVM_PMM_GPU_MEMORY_TYPE_KERNEL);
UVM_ENUM_STRING_DEFAULT(); UVM_ENUM_STRING_DEFAULT();
} }
BUILD_BUG_ON(UVM_PMM_GPU_MEMORY_TYPE_COUNT != 2); BUILD_BUG_ON(UVM_PMM_GPU_MEMORY_TYPE_COUNT != 2);
} }
const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state) const char *uvm_pmm_gpu_chunk_state_string(uvm_pmm_gpu_chunk_state_t state)
@ -461,31 +448,13 @@ bool uvm_pmm_gpu_memory_type_is_user(uvm_pmm_gpu_memory_type_t type)
UVM_ASSERT(type < UVM_PMM_GPU_MEMORY_TYPE_COUNT); UVM_ASSERT(type < UVM_PMM_GPU_MEMORY_TYPE_COUNT);
switch (type) { switch (type) {
case UVM_PMM_GPU_MEMORY_TYPE_USER: case UVM_PMM_GPU_MEMORY_TYPE_USER:
return true; return true;
default: default:
return false; return false;
} }
} }
static void uvm_gpu_chunk_set_in_eviction(uvm_gpu_chunk_t *chunk, bool in_eviction) static void uvm_gpu_chunk_set_in_eviction(uvm_gpu_chunk_t *chunk, bool in_eviction)
{ {
UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type)); UVM_ASSERT(uvm_pmm_gpu_memory_type_is_user(chunk->type));
@ -545,20 +514,7 @@ void uvm_pmm_gpu_sync(uvm_pmm_gpu_t *pmm)
static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_parent_gpu_t *parent_gpu, uvm_pmm_gpu_memory_type_t type) static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_parent_gpu_t *parent_gpu, uvm_pmm_gpu_memory_type_t type)
{ {
return type; return type;
} }
NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm, NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
@ -647,10 +603,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
size_t i; size_t i;
uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL; uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker); status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -1715,11 +1667,6 @@ void uvm_pmm_gpu_mark_root_chunk_unused(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chu
root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_unused); root_chunk_update_eviction_list(pmm, chunk, &pmm->root_chunks.va_block_unused);
} }
static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm) static uvm_gpu_root_chunk_t *pick_root_chunk_to_evict(uvm_pmm_gpu_t *pmm)
{ {
uvm_gpu_chunk_t *chunk; uvm_gpu_chunk_t *chunk;
@ -2192,13 +2139,6 @@ NV_STATUS alloc_root_chunk(uvm_pmm_gpu_t *pmm,
if (gpu->parent->numa_info.enabled) if (gpu->parent->numa_info.enabled)
flags |= UVM_PMM_ALLOC_FLAGS_DONT_BATCH; flags |= UVM_PMM_ALLOC_FLAGS_DONT_BATCH;
if (!gpu->parent->rm_info.isSimulated && if (!gpu->parent->rm_info.isSimulated &&
!(options.flags & UVM_PMA_ALLOCATE_PINNED) && !(options.flags & UVM_PMA_ALLOCATE_PINNED) &&
!(flags & UVM_PMM_ALLOC_FLAGS_DONT_BATCH)) { !(flags & UVM_PMM_ALLOC_FLAGS_DONT_BATCH)) {
@ -2449,14 +2389,6 @@ static bool check_chunk(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
UVM_ASSERT(uvm_global_id_equal(uvm_global_gpu_id_from_index(chunk->gpu_global_index), gpu->global_id)); UVM_ASSERT(uvm_global_id_equal(uvm_global_gpu_id_from_index(chunk->gpu_global_index), gpu->global_id));
if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT) if (chunk->state == UVM_PMM_GPU_CHUNK_STATE_IS_SPLIT)
UVM_ASSERT(chunk_size > uvm_chunk_find_first_size(chunk_sizes)); UVM_ASSERT(chunk_size > uvm_chunk_find_first_size(chunk_sizes));
@ -2766,13 +2698,6 @@ static bool uvm_pmm_should_inject_pma_eviction_error(uvm_pmm_gpu_t *pmm)
return false; return false;
} }
// See the documentation of pmaEvictPagesCb_t in pma.h for details of the // See the documentation of pmaEvictPagesCb_t in pma.h for details of the
// expected semantics. // expected semantics.
static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm, static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
@ -3312,16 +3237,8 @@ NV_STATUS uvm_pmm_gpu_init(uvm_pmm_gpu_t *pmm)
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm); uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
const uvm_chunk_sizes_mask_t chunk_size_init[][UVM_PMM_GPU_MEMORY_TYPE_COUNT] = const uvm_chunk_sizes_mask_t chunk_size_init[][UVM_PMM_GPU_MEMORY_TYPE_COUNT] =
{ {
{ gpu->parent->mmu_user_chunk_sizes, gpu->parent->mmu_kernel_chunk_sizes }, { gpu->parent->mmu_user_chunk_sizes, gpu->parent->mmu_kernel_chunk_sizes },
{ 0, uvm_mem_kernel_chunk_sizes(gpu)}, { 0, uvm_mem_kernel_chunk_sizes(gpu)},
}; };
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
size_t i, j, k; size_t i, j, k;

View File

@ -95,22 +95,9 @@ typedef enum
// Memory type for backing user pages. On Pascal+ it can be evicted. // Memory type for backing user pages. On Pascal+ it can be evicted.
UVM_PMM_GPU_MEMORY_TYPE_USER, UVM_PMM_GPU_MEMORY_TYPE_USER,
// Memory type for internal UVM allocations. It cannot be evicted. // Memory type for internal UVM allocations. It cannot be evicted.
UVM_PMM_GPU_MEMORY_TYPE_KERNEL, UVM_PMM_GPU_MEMORY_TYPE_KERNEL,
// Number of types - MUST BE LAST. // Number of types - MUST BE LAST.
UVM_PMM_GPU_MEMORY_TYPE_COUNT UVM_PMM_GPU_MEMORY_TYPE_COUNT
} uvm_pmm_gpu_memory_type_t; } uvm_pmm_gpu_memory_type_t;
@ -447,10 +434,6 @@ NV_STATUS uvm_pmm_gpu_alloc(uvm_pmm_gpu_t *pmm,
// //
// Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to // Internally calls uvm_pmm_gpu_alloc() and sets the state of all chunks to
// allocated on success. // allocated on success.
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm, NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
size_t num_chunks, size_t num_chunks,
uvm_chunk_size_t chunk_size, uvm_chunk_size_t chunk_size,
@ -466,11 +449,6 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
// If the memory returned by the PMM allocator cannot be physically addressed, // If the memory returned by the PMM allocator cannot be physically addressed,
// the MMU interface provides user chunk mapping and unmapping functions // the MMU interface provides user chunk mapping and unmapping functions
// (uvm_mmu_chunk_map/unmap) that enable virtual addressing. // (uvm_mmu_chunk_map/unmap) that enable virtual addressing.
static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm, static NV_STATUS uvm_pmm_gpu_alloc_user(uvm_pmm_gpu_t *pmm,
size_t num_chunks, size_t num_chunks,
uvm_chunk_size_t chunk_size, uvm_chunk_size_t chunk_size,

View File

@ -101,24 +101,6 @@ typedef enum
static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_parent_gpu_t *parent_gpu, uvm_pmm_gpu_memory_type_t type) static uvm_pmm_gpu_memory_type_t pmm_squash_memory_type(uvm_parent_gpu_t *parent_gpu, uvm_pmm_gpu_memory_type_t type)
{ {
return type; return type;
} }
@ -324,14 +306,6 @@ static NV_STATUS gpu_mem_check(uvm_gpu_t *gpu,
NvU32 *verif_cpu_addr = uvm_mem_get_cpu_addr_kernel(verif_mem); NvU32 *verif_cpu_addr = uvm_mem_get_cpu_addr_kernel(verif_mem);
size_t i; size_t i;
UVM_ASSERT(verif_mem->size >= size); UVM_ASSERT(verif_mem->size >= size);
memset(verif_cpu_addr, 0, size); memset(verif_cpu_addr, 0, size);
@ -537,11 +511,7 @@ static NV_STATUS basic_test(uvm_va_space_t *va_space, uvm_gpu_t *gpu,
if (mode == UvmTestPmmSanityModeBasic) { if (mode == UvmTestPmmSanityModeBasic) {
first_memory_type = UVM_PMM_GPU_MEMORY_TYPE_USER; first_memory_type = UVM_PMM_GPU_MEMORY_TYPE_USER;
last_memory_type = UVM_PMM_GPU_MEMORY_TYPE_USER; last_memory_type = UVM_PMM_GPU_MEMORY_TYPE_USER;
first_free_pattern = BASIC_TEST_FREE_PATTERN_EVERY_N; first_free_pattern = BASIC_TEST_FREE_PATTERN_EVERY_N;
last_free_pattern = BASIC_TEST_FREE_PATTERN_EVERY_N; last_free_pattern = BASIC_TEST_FREE_PATTERN_EVERY_N;
} }
@ -897,10 +867,6 @@ NV_STATUS uvm_test_pmm_check_leak(UVM_TEST_PMM_CHECK_LEAK_PARAMS *params, struct
uvm_pmm_gpu_memory_type_t last_user_mode = UVM_PMM_GPU_MEMORY_TYPE_USER; uvm_pmm_gpu_memory_type_t last_user_mode = UVM_PMM_GPU_MEMORY_TYPE_USER;
uvm_pmm_gpu_memory_type_t current_user_mode = first_user_mode; uvm_pmm_gpu_memory_type_t current_user_mode = first_user_mode;
if (params->alloc_limit < -1) if (params->alloc_limit < -1)
return NV_ERR_INVALID_ARGUMENT; return NV_ERR_INVALID_ARGUMENT;
@ -1036,10 +1002,6 @@ NV_STATUS uvm_test_pmm_async_alloc(UVM_TEST_PMM_ASYNC_ALLOC_PARAMS *params, stru
uvm_pmm_gpu_memory_type_t last_user_mode = UVM_PMM_GPU_MEMORY_TYPE_USER; uvm_pmm_gpu_memory_type_t last_user_mode = UVM_PMM_GPU_MEMORY_TYPE_USER;
uvm_pmm_gpu_memory_type_t current_user_mode = first_user_mode; uvm_pmm_gpu_memory_type_t current_user_mode = first_user_mode;
uvm_va_space_down_read(va_space); uvm_va_space_down_read(va_space);
gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid); gpu = uvm_va_space_get_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu) { if (!gpu) {
@ -1237,9 +1199,7 @@ static NV_STATUS test_indirect_peers(uvm_gpu_t *owning_gpu, uvm_gpu_t *accessing
if (!chunks) if (!chunks)
return NV_ERR_NO_MEMORY; return NV_ERR_NO_MEMORY;
UVM_ASSERT(!g_uvm_global.sev_enabled);
TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out); TEST_NV_CHECK_GOTO(uvm_mem_alloc_sysmem_and_map_cpu_kernel(UVM_CHUNK_SIZE_MAX, current->mm, &verif_mem), out);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out); TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(verif_mem, owning_gpu), out);

View File

@ -259,7 +259,7 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
va_list args; va_list args;
NV_STATUS status; NV_STATUS status;
status = uvm_channel_reserve(channel); status = uvm_channel_reserve(channel, 1);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -270,6 +270,25 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
return status; return status;
} }
__attribute__ ((format(printf, 7, 8)))
NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *channel,
uvm_tracker_t *tracker,
uvm_push_t *push,
const char *filename,
const char *function,
int line,
const char *format, ...)
{
va_list args;
NV_STATUS status;
va_start(args, format);
status = push_begin_acquire_with_info(channel, tracker, push, filename, function, line, format, args);
va_end(args);
return status;
}
bool uvm_push_info_is_tracking_descriptions() bool uvm_push_info_is_tracking_descriptions()
{ {
return uvm_debug_enable_push_desc != 0; return uvm_debug_enable_push_desc != 0;
@ -422,10 +441,6 @@ NvU64 *uvm_push_timestamp(uvm_push_t *push)
if (uvm_channel_is_ce(push->channel)) if (uvm_channel_is_ce(push->channel))
gpu->parent->ce_hal->semaphore_timestamp(push, address.address); gpu->parent->ce_hal->semaphore_timestamp(push, address.address);
else else
UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n"); UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n");

View File

@ -35,10 +35,6 @@
// This is the storage required by a semaphore release. // This is the storage required by a semaphore release.
#define UVM_PUSH_CE_END_SIZE 24 #define UVM_PUSH_CE_END_SIZE 24
// The max amount of inline push data is limited by how much space can be jumped // The max amount of inline push data is limited by how much space can be jumped
// over with a single NOOP method. // over with a single NOOP method.
#define UVM_PUSH_INLINE_DATA_MAX_SIZE (UVM_METHOD_COUNT_MAX * UVM_METHOD_SIZE) #define UVM_PUSH_INLINE_DATA_MAX_SIZE (UVM_METHOD_COUNT_MAX * UVM_METHOD_SIZE)
@ -200,6 +196,15 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
int line, int line,
const char *format, ...); const char *format, ...);
// Internal helper for uvm_push_begin_on_reserved channel
__attribute__ ((format(printf, 7, 8)))
NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *channel,
uvm_tracker_t *tracker,
uvm_push_t *push,
const char *filename,
const char *function,
int line,
const char *format, ...);
// Begin a push on a channel of channel_type type // Begin a push on a channel of channel_type type
// Picks the first available channel. If all channels of the given type are // Picks the first available channel. If all channels of the given type are
// busy, spin waits for one to become available. // busy, spin waits for one to become available.
@ -207,20 +212,22 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
// Notably requires a description of the push to be provided. This is currently // Notably requires a description of the push to be provided. This is currently
// unused, but will be in the future for tracking push history. // unused, but will be in the future for tracking push history.
// //
// Locking: on success acquires the concurrent push semaphore until uvm_push_end() // Locking: on success acquires the concurrent push semaphore until
// uvm_push_end()
#define uvm_push_begin(manager, type, push, format, ...) \ #define uvm_push_begin(manager, type, push, format, ...) \
__uvm_push_begin_acquire_with_info((manager), (type), NULL, NULL, (push), \ __uvm_push_begin_acquire_with_info((manager), (type), NULL, NULL, (push), \
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__) __FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
// Begin a push on a channel of channel_type type with dependencies in the tracker // Begin a push on a channel of channel_type type with dependencies in the
// This is equivalent to starting a push and acquiring the tracker, but in the // tracker. This is equivalent to starting a push and acquiring the tracker, but
// future it will have the ability to pick the channel to do a push on in a // in the future it will have the ability to pick the channel to do a push on in
// smarter way based on its dependencies. // a smarter way based on its dependencies.
// //
// Same as for uvm_push_acquire_tracker(), the tracker can be NULL. In this case // Same as for uvm_push_acquire_tracker(), the tracker can be NULL. In this case
// this will be equivalent to just uvm_push_begin(). // this will be equivalent to just uvm_push_begin().
// //
// Locking: on success acquires the concurrent push semaphore until uvm_push_end() // Locking: on success acquires the concurrent push semaphore until
// uvm_push_end()
#define uvm_push_begin_acquire(manager, type, tracker, push, format, ...) \ #define uvm_push_begin_acquire(manager, type, tracker, push, format, ...) \
__uvm_push_begin_acquire_with_info((manager), (type), NULL, (tracker), (push), \ __uvm_push_begin_acquire_with_info((manager), (type), NULL, (tracker), (push), \
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__) __FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
@ -241,11 +248,20 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
// Begin a push on a specific channel // Begin a push on a specific channel
// If the channel is busy, spin wait for it to become available. // If the channel is busy, spin wait for it to become available.
// //
// Locking: on success acquires the concurrent push semaphore until uvm_push_end() // Locking: on success acquires the concurrent push semaphore until
// uvm_push_end()
#define uvm_push_begin_on_channel(channel, push, format, ...) \ #define uvm_push_begin_on_channel(channel, push, format, ...) \
__uvm_push_begin_acquire_on_channel_with_info((channel), NULL, (push), \ __uvm_push_begin_acquire_on_channel_with_info((channel), NULL, (push), \
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__) __FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
// Begin a push on a specific pre-reserved channel
//
// Locking: on success acquires the concurrent push semaphore until
// uvm_push_end()
#define uvm_push_begin_on_reserved_channel(channel, push, format, ...) \
__uvm_push_begin_acquire_on_reserved_channel_with_info((channel), NULL, (push), \
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
// Same as uvm_push_begin_on_channel except it also acquires the input tracker // Same as uvm_push_begin_on_channel except it also acquires the input tracker
// for the caller // for the caller
#define uvm_push_begin_acquire_on_channel(channel, tracker, push, format, ...) \ #define uvm_push_begin_acquire_on_channel(channel, tracker, push, format, ...) \

View File

@ -97,21 +97,14 @@
#define UVM_SUBCHANNEL_C56F UVM_SUBCHANNEL_HOST #define UVM_SUBCHANNEL_C56F UVM_SUBCHANNEL_HOST
#define UVM_SUBCHANNEL_C6B5 UVM_SUBCHANNEL_CE #define UVM_SUBCHANNEL_C6B5 UVM_SUBCHANNEL_CE
#define UVM_SUBCHANNEL_C86F UVM_SUBCHANNEL_HOST
#define UVM_SUBCHANNEL_C8B5 UVM_SUBCHANNEL_CE
// Channel for UVM SW methods. This is defined in nv_uvm_types.h. RM does not // Channel for UVM SW methods. This is defined in nv_uvm_types.h. RM does not
// care about the specific number as long as it's bigger than the largest HW // care about the specific number as long as it's bigger than the largest HW
// value. For example, Kepler reserves subchannels 5-7 for software objects. // value. For example, Kepler reserves subchannels 5-7 for software objects.
#define UVM_SUBCHANNEL_C076 UVM_SW_OBJ_SUBCHANNEL #define UVM_SUBCHANNEL_C076 UVM_SW_OBJ_SUBCHANNEL
#define UVM_METHOD_SIZE 4 #define UVM_METHOD_SIZE 4
#define UVM_METHOD_COUNT_MAX HWMASK(B06F, DMA, INCR_COUNT) #define UVM_METHOD_COUNT_MAX HWMASK(B06F, DMA, INCR_COUNT)
#if HWMASK(B06F, DMA, INCR_COUNT) != HWMASK(B06F, DMA, NONINCR_COUNT) #if HWMASK(B06F, DMA, INCR_COUNT) != HWMASK(B06F, DMA, NONINCR_COUNT)

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -43,16 +43,9 @@ static NvU32 get_push_end_size(uvm_channel_t *channel)
if (uvm_channel_is_ce(channel)) if (uvm_channel_is_ce(channel))
return UVM_PUSH_CE_END_SIZE; return UVM_PUSH_CE_END_SIZE;
return 0; return 0;
} }
static NV_STATUS test_push_end_size(uvm_va_space_t *va_space) static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
{ {
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
@ -66,10 +59,6 @@ static NV_STATUS test_push_end_size(uvm_va_space_t *va_space)
NvU32 push_end_size; NvU32 push_end_size;
uvm_channel_type_t type = i; uvm_channel_type_t type = i;
status = uvm_push_begin(gpu->channel_manager, type, &push, "type %u\n", (unsigned)type); status = uvm_push_begin(gpu->channel_manager, type, &push, "type %u\n", (unsigned)type);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
@ -302,7 +291,7 @@ static NV_STATUS test_push_interleaving_on_gpu(uvm_gpu_t* gpu)
// single thread. // single thread.
uvm_thread_context_lock_disable_tracking(); uvm_thread_context_lock_disable_tracking();
status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, &mem); status = uvm_rm_mem_alloc_and_map_cpu(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &mem);
TEST_CHECK_GOTO(status == NV_OK, done); TEST_CHECK_GOTO(status == NV_OK, done);
host_va = (NvU32*)uvm_rm_mem_get_cpu_va(mem); host_va = (NvU32*)uvm_rm_mem_get_cpu_va(mem);
gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel)); gpu_va = uvm_rm_mem_get_gpu_va(mem, gpu, uvm_channel_is_proxy(channel));
@ -559,10 +548,6 @@ done:
static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu) static NV_STATUS test_max_pushes_on_gpu(uvm_gpu_t *gpu)
{ {
TEST_NV_CHECK_RET(test_max_pushes_on_gpu_and_channel_type(gpu, UVM_CHANNEL_TYPE_GPU_INTERNAL)); TEST_NV_CHECK_RET(test_max_pushes_on_gpu_and_channel_type(gpu, UVM_CHANNEL_TYPE_GPU_INTERNAL));
return NV_OK; return NV_OK;

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -80,6 +80,7 @@ NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_push
NV_STATUS status; NV_STATUS status;
int i; int i;
uvm_gpu_t *gpu = channel_manager->gpu; uvm_gpu_t *gpu = channel_manager->gpu;
NvU64 pushbuffer_alignment;
uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer)); uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer));
if (pushbuffer == NULL) if (pushbuffer == NULL)
@ -96,15 +97,32 @@ NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_push
UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS || UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS ||
channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID); channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID);
// The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size
// (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that
// the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set
// the Esched/PBDMA segment base for all channels during their
// initialization and it is immutable for the entire channels' lifetime.
BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE);
BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40));
if (gpu->uvm_test_force_upper_pushbuffer_segment)
pushbuffer_alignment = (1ull << 40);
else
pushbuffer_alignment = UVM_PUSHBUFFER_SIZE;
status = uvm_rm_mem_alloc_and_map_cpu(gpu, status = uvm_rm_mem_alloc_and_map_cpu(gpu,
(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS)? (channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ?
UVM_RM_MEM_TYPE_SYS: UVM_RM_MEM_TYPE_SYS:
UVM_RM_MEM_TYPE_GPU, UVM_RM_MEM_TYPE_GPU,
UVM_PUSHBUFFER_SIZE, UVM_PUSHBUFFER_SIZE,
pushbuffer_alignment,
&pushbuffer->memory); &pushbuffer->memory);
if (status != NV_OK) if (status != NV_OK)
goto error; goto error;
// Verify the GPU can access the pushbuffer.
UVM_ASSERT(uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE < gpu->parent->max_host_va);
bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS); bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS); bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
@ -375,10 +393,14 @@ static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm
void uvm_pushbuffer_mark_completed(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo) void uvm_pushbuffer_mark_completed(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
{ {
uvm_pushbuffer_chunk_t *chunk = gpfifo_to_chunk(pushbuffer, gpfifo); uvm_pushbuffer_chunk_t *chunk;
uvm_push_info_t *push_info = gpfifo->push_info; uvm_push_info_t *push_info = gpfifo->push_info;
bool need_to_update_chunk = false; bool need_to_update_chunk = false;
UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
if (push_info->on_complete != NULL) if (push_info->on_complete != NULL)
push_info->on_complete(push_info->on_complete_data); push_info->on_complete(push_info->on_complete_data);
@ -486,3 +508,8 @@ void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer)
{ {
return uvm_pushbuffer_print_common(pushbuffer, NULL); return uvm_pushbuffer_print_common(pushbuffer, NULL);
} }
NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
{
return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu);
}

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -236,4 +236,7 @@ bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer);
// Helper to print pushbuffer state for debugging // Helper to print pushbuffer state for debugging
void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer); void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer);
// Helper to retrieve the pushbuffer->memory GPU VA.
NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer);
#endif // __UVM_PUSHBUFFER_H__ #endif // __UVM_PUSHBUFFER_H__

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -179,11 +179,16 @@ static void rm_mem_unmap_gpu_proxy(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
rm_mem_clear_gpu_proxy_va(rm_mem, gpu); rm_mem_clear_gpu_proxy_va(rm_mem, gpu);
} }
NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out)
NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out)
{ {
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
uvm_rm_mem_t *rm_mem; uvm_rm_mem_t *rm_mem;
UvmGpuAllocInfo alloc_info = {0}; UvmGpuAllocInfo alloc_info = { 0 };
NvU64 gpu_va; NvU64 gpu_va;
UVM_ASSERT(gpu); UVM_ASSERT(gpu);
@ -194,12 +199,7 @@ NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size
if (rm_mem == NULL) if (rm_mem == NULL)
return NV_ERR_NO_MEMORY; return NV_ERR_NO_MEMORY;
alloc_info.alignment = gpu_alignment;
if (type == UVM_RM_MEM_TYPE_SYS) if (type == UVM_RM_MEM_TYPE_SYS)
status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info)); status = uvm_rm_locked_call(nvUvmInterfaceMemoryAllocSys(gpu->rm_address_space, size, &gpu_va, &alloc_info));
@ -274,7 +274,7 @@ void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem)
rm_mem_clear_cpu_va(rm_mem); rm_mem_clear_cpu_va(rm_mem);
} }
NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu) NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment)
{ {
NV_STATUS status; NV_STATUS status;
uvm_gpu_t *gpu_owner; uvm_gpu_t *gpu_owner;
@ -296,6 +296,7 @@ NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu)
status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space, status = uvm_rm_locked_call(nvUvmInterfaceDupAllocation(gpu_owner->rm_address_space,
gpu_owner_va, gpu_owner_va,
gpu->rm_address_space, gpu->rm_address_space,
gpu_alignment,
&gpu_va)); &gpu_va));
if (status != NV_OK) { if (status != NV_OK) {
UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n", UVM_ERR_PRINT("nvUvmInterfaceDupAllocation() failed: %s, src GPU %s, dest GPU %s\n",
@ -374,12 +375,16 @@ void uvm_rm_mem_free(uvm_rm_mem_t *rm_mem)
uvm_kvfree(rm_mem); uvm_kvfree(rm_mem);
} }
NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out) NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out)
{ {
uvm_rm_mem_t *rm_mem; uvm_rm_mem_t *rm_mem;
NV_STATUS status; NV_STATUS status;
status = uvm_rm_mem_alloc(gpu, type, size, &rm_mem); status = uvm_rm_mem_alloc(gpu, type, size, gpu_alignment, &rm_mem);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -396,32 +401,36 @@ error:
return status; return status;
} }
NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem) NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment)
{ {
uvm_gpu_t *gpu; uvm_gpu_t *gpu;
UVM_ASSERT(rm_mem); UVM_ASSERT(rm_mem);
for_each_global_gpu(gpu) { for_each_global_gpu(gpu) {
NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu); NV_STATUS status = uvm_rm_mem_map_gpu(rm_mem, gpu, gpu_alignment);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
} }
return NV_OK; return NV_OK;
} }
NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out) NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out)
{ {
uvm_rm_mem_t *rm_mem; uvm_rm_mem_t *rm_mem;
NV_STATUS status; NV_STATUS status;
UVM_ASSERT(gpu); UVM_ASSERT(gpu);
status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, &rm_mem); status = uvm_rm_mem_alloc_and_map_cpu(gpu, type, size, gpu_alignment, &rm_mem);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
status = uvm_rm_mem_map_all_gpus(rm_mem); status = uvm_rm_mem_map_all_gpus(rm_mem, gpu_alignment);
if (status != NV_OK) if (status != NV_OK)
goto error; goto error;

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2019 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -71,11 +71,18 @@ struct uvm_rm_mem_struct
// mapping created and removed dynamically with the uvm_rm_mem_(un)map_gpu() // mapping created and removed dynamically with the uvm_rm_mem_(un)map_gpu()
// functions. // functions.
// //
// Alignment affects only the GPU VA mapping. If gpu_alignment is 0, then 4K
// alignment is enforced.
//
// Locking: // Locking:
// - Internally acquires: // - Internally acquires:
// - RM API lock // - RM API lock
// - RM GPUs lock // - RM GPUs lock
NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out); NV_STATUS uvm_rm_mem_alloc(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out);
// Free the memory. // Free the memory.
// Clear all mappings and free the memory // Clear all mappings and free the memory
@ -89,16 +96,26 @@ NV_STATUS uvm_rm_mem_map_cpu(uvm_rm_mem_t *rm_mem);
void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem); void uvm_rm_mem_unmap_cpu(uvm_rm_mem_t *rm_mem);
// Shortcut for uvm_rm_mem_alloc() + uvm_rm_mem_map_cpu(). // Shortcut for uvm_rm_mem_alloc() + uvm_rm_mem_map_cpu().
// The function fails and nothing is allocated if any of the intermediate steps fail. // The function fails and nothing is allocated if any of the intermediate steps
// fail.
// //
// Locking same as uvm_rm_mem_alloc() // Locking same as uvm_rm_mem_alloc()
NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out); NV_STATUS uvm_rm_mem_alloc_and_map_cpu(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out);
// Shortcut for uvm_rm_mem_alloc_and_map_cpu() + uvm_rm_mem_map_all_gpus() // Shortcut for uvm_rm_mem_alloc_and_map_cpu() + uvm_rm_mem_map_all_gpus()
// The function fails and nothing is allocated if any of the intermediate steps fail. // The function fails and nothing is allocated if any of the intermediate steps
// fail.
// //
// Locking same as uvm_rm_mem_alloc() // Locking same as uvm_rm_mem_alloc()
NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, NvLength size, uvm_rm_mem_t **rm_mem_out); NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu,
uvm_rm_mem_type_t type,
NvLength size,
NvU64 gpu_alignment,
uvm_rm_mem_t **rm_mem_out);
// Map/Unmap on UVM's internal address space of a GPU. In SR-IOV heavy the // Map/Unmap on UVM's internal address space of a GPU. In SR-IOV heavy the
// operation is also applied on the GPU's proxy address space. // operation is also applied on the GPU's proxy address space.
@ -107,7 +124,7 @@ NV_STATUS uvm_rm_mem_alloc_and_map_all(uvm_gpu_t *gpu, uvm_rm_mem_type_t type, N
// from the one the memory was originally allocated for. // from the one the memory was originally allocated for.
// //
// Locking same as uvm_rm_mem_alloc() // Locking same as uvm_rm_mem_alloc()
NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu); NV_STATUS uvm_rm_mem_map_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 gpu_alignment);
void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu); void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu);
// Map on UVM's internal address space of all GPUs retained by the UVM driver // Map on UVM's internal address space of all GPUs retained by the UVM driver
@ -115,7 +132,7 @@ void uvm_rm_mem_unmap_gpu(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu);
// also mapped on the proxy address space of all GPUs. // also mapped on the proxy address space of all GPUs.
// //
// Locking same as uvm_rm_mem_alloc() // Locking same as uvm_rm_mem_alloc()
NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem); NV_STATUS uvm_rm_mem_map_all_gpus(uvm_rm_mem_t *rm_mem, NvU64 gpu_alignment);
// Get the CPU VA, GPU VA (UVM internal/kernel address space), or GPU (proxy // Get the CPU VA, GPU VA (UVM internal/kernel address space), or GPU (proxy
// address space) // address space)
@ -125,7 +142,9 @@ NvU64 uvm_rm_mem_get_gpu_proxy_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu);
// Get the GPU VA of the given memory in UVM's internal address space (if the // Get the GPU VA of the given memory in UVM's internal address space (if the
// flag is false), or proxy address space (if flag is true). // flag is false), or proxy address space (if flag is true).
NvU64 uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, bool is_proxy_va_space); NvU64 uvm_rm_mem_get_gpu_va(uvm_rm_mem_t *rm_mem,
uvm_gpu_t *gpu,
bool is_proxy_va_space);
// Query if the memory is mapped on the CPU, GPU (UVM internal/kernel address // Query if the memory is mapped on the CPU, GPU (UVM internal/kernel address
// space), or GPU (proxy address space) // space), or GPU (proxy address space)

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2015-2021 NVIDIA Corporation Copyright (c) 2015-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -40,6 +40,7 @@ static NV_STATUS map_cpu(uvm_rm_mem_t *rm_mem)
// Unmap // Unmap
uvm_rm_mem_unmap_cpu(rm_mem); uvm_rm_mem_unmap_cpu(rm_mem);
// Unmapping already unmapped also OK // Unmapping already unmapped also OK
uvm_rm_mem_unmap_cpu(rm_mem); uvm_rm_mem_unmap_cpu(rm_mem);
@ -59,9 +60,10 @@ static NV_STATUS map_cpu(uvm_rm_mem_t *rm_mem)
return NV_OK; return NV_OK;
} }
static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem) static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
{ {
uvm_gpu_t *gpu = rm_mem->gpu_owner; uvm_gpu_t *gpu = rm_mem->gpu_owner;
NvU64 gpu_va;
// The memory should have been automatically mapped in the GPU owner // The memory should have been automatically mapped in the GPU owner
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)); TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
@ -71,26 +73,31 @@ static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem)
// located in vidmem. // located in vidmem.
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu)); TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
if (alignment)
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
// Explicitly mapping or unmapping to the GPU that owns the allocation is // Explicitly mapping or unmapping to the GPU that owns the allocation is
// not allowed, so the testing related to GPU owners is simpler than that of // not allowed, so the testing related to GPU owners is simpler than that of
// other GPUs. // other GPUs.
return NV_OK; return NV_OK;
} }
static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space) static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space, NvU64 alignment)
{ {
uvm_gpu_t *gpu_owner = rm_mem->gpu_owner; uvm_gpu_t *gpu_owner = rm_mem->gpu_owner;
uvm_gpu_t *gpu; uvm_gpu_t *gpu;
NvU64 gpu_va;
for_each_va_space_gpu(gpu, va_space) { for_each_va_space_gpu(gpu, va_space) {
if (gpu == gpu_owner) if (gpu == gpu_owner)
continue; continue;
TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu)); TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu, alignment));
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)); TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
// Mappings are not ref counted, so additional map calls are no-ops // Mappings are not ref counted, so additional map calls are no-ops
TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu)); TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu, alignment));
// The previous GPU map calls added mappings to the proxy VA space // The previous GPU map calls added mappings to the proxy VA space
// when in SR-IOV heavy mode // when in SR-IOV heavy mode
@ -107,10 +114,14 @@ static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space)
TEST_CHECK_RET(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu)); TEST_CHECK_RET(!uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
// Subsequent mappings should behave as they did in the beginning. // Subsequent mappings should behave as they did in the beginning.
TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu)); TEST_NV_CHECK_RET(uvm_rm_mem_map_gpu(rm_mem, gpu, alignment));
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu)); TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu)); TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
if (alignment)
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
} }
return NV_OK; return NV_OK;
@ -124,33 +135,45 @@ static NV_STATUS test_all_gpus_in_va(uvm_va_space_t *va_space)
// Create allocations of these types // Create allocations of these types
static const uvm_rm_mem_type_t mem_types[] = { UVM_RM_MEM_TYPE_SYS, UVM_RM_MEM_TYPE_GPU }; static const uvm_rm_mem_type_t mem_types[] = { UVM_RM_MEM_TYPE_SYS, UVM_RM_MEM_TYPE_GPU };
// Create allocations of these sizes // Create allocations of these sizes
static const size_t sizes[] = {1, 4, 16, 128, 1024, 4096, 1024 * 1024, 4 * 1024 * 1024}; static const size_t sizes[] = { 1, 4, 16, 128, 1024, 4096, 1024 * 1024, 4 * 1024 * 1024 };
static const NvU64 alignments[] = { 0,
8,
UVM_PAGE_SIZE_4K >> 1,
UVM_PAGE_SIZE_4K,
UVM_PAGE_SIZE_4K << 1,
UVM_PAGE_SIZE_64K,
UVM_PAGE_SIZE_2M,
UVM_PAGE_SIZE_2M << 3,
UVM_PAGE_SIZE_2M << 5 };
uvm_assert_rwsem_locked(&va_space->lock); uvm_assert_rwsem_locked(&va_space->lock);
TEST_CHECK_RET(!uvm_processor_mask_empty(&va_space->registered_gpus)); TEST_CHECK_RET(!uvm_processor_mask_empty(&va_space->registered_gpus));
for_each_va_space_gpu(gpu, va_space) { for_each_va_space_gpu(gpu, va_space) {
int i, j; int i, j, k;
for (i = 0; i < ARRAY_SIZE(sizes); ++i) { for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
for (j = 0; j < ARRAY_SIZE(mem_types); ++j) { for (j = 0; j < ARRAY_SIZE(mem_types); ++j) {
for (k = 0; k < ARRAY_SIZE(alignments); ++k) {
// Create an allocation in the GPU's address space // Create an allocation in the GPU's address space
TEST_NV_CHECK_RET(uvm_rm_mem_alloc(gpu, mem_types[j], sizes[i], &rm_mem)); TEST_NV_CHECK_RET(uvm_rm_mem_alloc(gpu, mem_types[j], sizes[i], alignments[k], &rm_mem));
// Test CPU mappings // Test CPU mappings
TEST_NV_CHECK_GOTO(map_cpu(rm_mem), error); TEST_NV_CHECK_GOTO(map_cpu(rm_mem), error);
// Test mappings in the GPU owning the allocation // Test mappings in the GPU owning the allocation
TEST_NV_CHECK_GOTO(map_gpu_owner(rm_mem), error); TEST_NV_CHECK_GOTO(map_gpu_owner(rm_mem, alignments[k]), error);
// For sysmem allocations, test mappings on all other GPUs // For sysmem allocations, test mappings on all other GPUs
if (rm_mem->type == UVM_RM_MEM_TYPE_SYS) if (rm_mem->type == UVM_RM_MEM_TYPE_SYS)
TEST_NV_CHECK_GOTO(map_other_gpus(rm_mem, va_space), error); TEST_NV_CHECK_GOTO(map_other_gpus(rm_mem, va_space, alignments[k]), error);
uvm_rm_mem_free(rm_mem); uvm_rm_mem_free(rm_mem);
}
} }
} }
} }

View File

@ -327,9 +327,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR, UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_RANGE_INJECT_ADD_GPU_VA_SPACE_ERROR,
uvm_test_va_range_inject_add_gpu_va_space_error); uvm_test_va_range_inject_add_gpu_va_space_error);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY, uvm_test_destroy_gpu_va_space_delay); UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY, uvm_test_destroy_gpu_va_space_delay);
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported); UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
} }

View File

@ -188,7 +188,4 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp); NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp); NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp); NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *filp);
#endif #endif

View File

@ -1390,14 +1390,6 @@ typedef struct
NV_STATUS rmStatus; // Out NV_STATUS rmStatus; // Out
} UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY_PARAMS; } UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY_PARAMS;
#define UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED UVM_TEST_IOCTL_BASE(96) #define UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED UVM_TEST_IOCTL_BASE(96)
typedef struct typedef struct
{ {

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -168,10 +168,9 @@ static NV_STATUS uvm_user_channel_create(uvm_va_space_t *va_space,
user_channel->tsg.valid = channel_info->bTsgChannel == NV_TRUE; user_channel->tsg.valid = channel_info->bTsgChannel == NV_TRUE;
user_channel->tsg.id = channel_info->tsgId; user_channel->tsg.id = channel_info->tsgId;
user_channel->tsg.max_subctx_count = channel_info->tsgMaxSubctxCount; user_channel->tsg.max_subctx_count = channel_info->tsgMaxSubctxCount;
user_channel->work_submission_token = channel_info->workSubmissionToken;
user_channel->work_submission_offset = channel_info->workSubmissionOffset;
user_channel->clear_faulted_token = channel_info->clearFaultedToken; user_channel->clear_faulted_token = channel_info->clearFaultedToken;
user_channel->chram_channel_register = channel_info->pChramChannelRegister; user_channel->chram_channel_register = channel_info->pChramChannelRegister;
user_channel->runlist_pri_base_register = channel_info->pRunlistPRIBaseRegister;
user_channel->smc_engine_id = channel_info->smcEngineId; user_channel->smc_engine_id = channel_info->smcEngineId;
user_channel->smc_engine_ve_id_offset = channel_info->smcEngineVeIdOffset; user_channel->smc_engine_ve_id_offset = channel_info->smcEngineVeIdOffset;

View File

@ -1,5 +1,5 @@
/******************************************************************************* /*******************************************************************************
Copyright (c) 2016-2019 NVIDIA Corporation Copyright (c) 2016-2022 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to of this software and associated documentation files (the "Software"), to
@ -98,17 +98,11 @@ struct uvm_user_channel_struct
// If valid is true, tsg_id contains the ID of the TSG // If valid is true, tsg_id contains the ID of the TSG
NvU32 id; NvU32 id;
// If valid is true, this is the maximum number of subcontexts in the TSG // If valid is true, this is the maximum number of subcontexts in the
// TSG
NvU32 max_subctx_count; NvU32 max_subctx_count;
} tsg; } tsg;
// This is the value that needs to be used when ringing the channel's
// doorbell
NvU32 work_submission_token;
// This is the address of the channel's doorbell
volatile NvU32 *work_submission_offset;
// On Turing+, the CLEAR_FAULTED method requires passing a RM-provided // On Turing+, the CLEAR_FAULTED method requires passing a RM-provided
// handle to identify the channel. // handle to identify the channel.
NvU32 clear_faulted_token; NvU32 clear_faulted_token;
@ -117,9 +111,11 @@ struct uvm_user_channel_struct
// channel removal // channel removal
uvm_tracker_t clear_faulted_tracker; uvm_tracker_t clear_faulted_tracker;
// Address of the NV_CHRAM_CHANNEL register. Only valid on GPUs with // Address of the NV_CHRAM_CHANNEL register and the runlist PRI base
// non_replayable_faults_supported && !has_clear_faulted_channel_method // register. Only valid on GPUs with
// non_replayable_faults_supported && !has_clear_faulted_channel_method.
volatile NvU32 *chram_channel_register; volatile NvU32 *chram_channel_register;
volatile NvU32 *runlist_pri_base_register;
// Id of the SMC engine this channel is bound to, or zero if the GPU // Id of the SMC engine this channel is bound to, or zero if the GPU
// does not support SMC or it is a CE channel // does not support SMC or it is a CE channel

View File

@ -40,9 +40,6 @@
#include "uvm_va_space_mm.h" #include "uvm_va_space_mm.h"
#include "uvm_test_ioctl.h" #include "uvm_test_ioctl.h"
typedef enum typedef enum
{ {
BLOCK_PTE_OP_MAP, BLOCK_PTE_OP_MAP,
@ -584,7 +581,6 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
{ {
uvm_va_block_t *block = NULL; uvm_va_block_t *block = NULL;
NvU64 size = end - start + 1; NvU64 size = end - start + 1;
NV_STATUS status;
UVM_ASSERT(PAGE_ALIGNED(start)); UVM_ASSERT(PAGE_ALIGNED(start));
UVM_ASSERT(PAGE_ALIGNED(end + 1)); UVM_ASSERT(PAGE_ALIGNED(end + 1));
@ -612,10 +608,8 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
block = nv_kmem_cache_zalloc(g_uvm_va_block_cache, NV_UVM_GFP_FLAGS); block = nv_kmem_cache_zalloc(g_uvm_va_block_cache, NV_UVM_GFP_FLAGS);
} }
if (!block) { if (!block)
status = NV_ERR_NO_MEMORY; return NV_ERR_NO_MEMORY;
goto error;
}
nv_kref_init(&block->kref); nv_kref_init(&block->kref);
uvm_mutex_init(&block->lock, UVM_LOCK_ORDER_VA_BLOCK); uvm_mutex_init(&block->lock, UVM_LOCK_ORDER_VA_BLOCK);
@ -628,10 +622,6 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
*out_block = block; *out_block = block;
return NV_OK; return NV_OK;
error:
uvm_va_block_release(block);
return status;
} }
static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu) static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu)
@ -2584,10 +2574,6 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE); uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size); copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size);
} }
@ -6883,8 +6869,11 @@ static void block_destroy_gpu_state(uvm_va_block_t *block, uvm_gpu_id_t id)
// Unmap PTEs and free page tables // Unmap PTEs and free page tables
gpu = uvm_va_space_get_gpu(va_space, id); gpu = uvm_va_space_get_gpu(va_space, id);
gpu_va_space = uvm_gpu_va_space_get(va_space, gpu); gpu_va_space = uvm_gpu_va_space_get(va_space, gpu);
if (gpu_va_space) if (gpu_va_space) {
uvm_va_block_remove_gpu_va_space(block, gpu_va_space, NULL); uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
uvm_va_block_remove_gpu_va_space(block, gpu_va_space, block_context);
}
UVM_ASSERT(!uvm_processor_mask_test(&block->mapped, id)); UVM_ASSERT(!uvm_processor_mask_test(&block->mapped, id));
@ -6962,10 +6951,10 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
return block_pre_populate_pde1_gpu(va_block, gpu_va_space, NULL); return block_pre_populate_pde1_gpu(va_block, gpu_va_space, NULL);
} }
void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm) void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block,
uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_context_t *block_context)
{ {
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, mm);
uvm_pte_batch_t *pte_batch = &block_context->mapping.pte_batch; uvm_pte_batch_t *pte_batch = &block_context->mapping.pte_batch;
uvm_tlb_batch_t *tlb_batch = &block_context->mapping.tlb_batch; uvm_tlb_batch_t *tlb_batch = &block_context->mapping.tlb_batch;
uvm_gpu_t *gpu = gpu_va_space->gpu; uvm_gpu_t *gpu = gpu_va_space->gpu;

View File

@ -440,11 +440,9 @@ struct uvm_va_block_struct
#if UVM_IS_CONFIG_HMM() #if UVM_IS_CONFIG_HMM()
struct struct
{ {
// The MMU notifier is registered per va_block. // The MMU notifier is registered per va_block.
struct mmu_interval_notifier notifier; struct mmu_interval_notifier notifier;
// Parent VA space pointer. It is NULL for UVM managed blocks or if // Parent VA space pointer. It is NULL for UVM managed blocks or if
// the HMM block is dead. This field can be read while holding the // the HMM block is dead. This field can be read while holding the
// block lock and is only modified while holding the va_space write // block lock and is only modified while holding the va_space write
@ -551,6 +549,11 @@ static inline void uvm_va_block_retain(uvm_va_block_t *va_block)
nv_kref_get(&va_block->kref); nv_kref_get(&va_block->kref);
} }
// Locking: The va_block lock must not be held.
// The va_space lock must be held in write mode unless it is the special case
// that the block has no GPU state; for example, right after calling
// uvm_va_block_create(). In that case, the va_space lock can be held in read
// mode.
static inline void uvm_va_block_release(uvm_va_block_t *va_block) static inline void uvm_va_block_release(uvm_va_block_t *va_block)
{ {
if (va_block) { if (va_block) {
@ -986,9 +989,11 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
// If mm != NULL, that mm is used for any CPU mappings which may be created as // If mm != NULL, that mm is used for any CPU mappings which may be created as
// a result of this call. See uvm_va_block_context_t::mm for details. // a result of this call. See uvm_va_block_context_t::mm for details.
// //
// LOCKING: The caller must hold the va_block lock. If mm != NULL, the caller // LOCKING: The caller must hold the va_block lock. If block_context->mm is not
// must hold mm->mmap_lock in at least read mode. // NULL, the caller must hold mm->mmap_lock in at least read mode.
void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm); void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block,
uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_context_t *block_context);
// Creates any mappings necessary in this VA block between the two GPUs, in // Creates any mappings necessary in this VA block between the two GPUs, in
// either direction. // either direction.

View File

@ -254,7 +254,6 @@ typedef struct
uvm_va_policy_t *policy; uvm_va_policy_t *policy;
#if UVM_IS_CONFIG_HMM() #if UVM_IS_CONFIG_HMM()
struct struct
{ {
@ -263,7 +262,6 @@ typedef struct
} hmm; } hmm;
#endif #endif
// Convenience buffer for page mask prints // Convenience buffer for page mask prints
char page_mask_string_buffer[UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE]; char page_mask_string_buffer[UVM_PAGE_MASK_PRINT_MIN_BUFFER_SIZE];
} uvm_va_block_context_t; } uvm_va_block_context_t;

View File

@ -172,6 +172,102 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block,
return uvm_va_policy_node_container(tree_node); return uvm_va_policy_node_container(tree_node);
} }
uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
NvU64 start,
NvU64 end,
uvm_va_policy_node_t **out_node,
uvm_va_block_region_t *out_region)
{
uvm_range_tree_node_t *tree_node;
uvm_va_policy_node_t *node;
uvm_va_policy_t *policy;
uvm_va_block_region_t region;
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
uvm_assert_mutex_locked(&va_block->lock);
UVM_ASSERT(start >= va_block->start);
UVM_ASSERT(end <= va_block->end);
UVM_ASSERT(start < end);
region.first = uvm_va_block_cpu_page_index(va_block, start);
// Even if no policy is found, we return the default policy and loop
// one time.
tree_node = uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end);
if (tree_node) {
node = uvm_va_policy_node_container(tree_node);
if (node->node.start <= start) {
policy = &node->policy;
region.outer = uvm_va_block_cpu_page_index(va_block, min(end, node->node.end)) + 1;
}
else {
// This node starts after the requested start so use the default,
// then use this policy node.
policy = &uvm_va_policy_default;
region.outer = uvm_va_block_cpu_page_index(va_block, node->node.start - 1) + 1;
}
}
else {
node = NULL;
policy = &uvm_va_policy_default;
region.outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
}
*out_node = node;
*out_region = region;
return policy;
}
uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block,
uvm_va_policy_t *policy,
NvU64 end,
uvm_va_policy_node_t **inout_node,
uvm_va_block_region_t *inout_region)
{
uvm_va_policy_node_t *node = *inout_node;
uvm_va_policy_node_t *next;
uvm_va_block_region_t region;
if (!node)
return NULL;
next = uvm_va_policy_node_iter_next(va_block, node, end);
if (policy == &uvm_va_policy_default) {
// We haven't used the current policy node yet so use it now.
next = node;
policy = &node->policy;
region = uvm_va_block_region_from_start_end(va_block,
node->node.start,
min(end, node->node.end));
}
else if (!next) {
if (node->node.end >= end)
return NULL;
policy = &uvm_va_policy_default;
region.first = inout_region->outer;
region.outer = uvm_va_block_cpu_page_index(va_block, end) + 1;
}
else {
region.first = inout_region->outer;
if (next->node.start <= uvm_va_block_region_start(va_block, region)) {
policy = &next->policy;
region.outer = uvm_va_block_cpu_page_index(va_block, min(end, next->node.end)) + 1;
}
else {
// There is a gap between the last node and next so use the
// default policy.
policy = &uvm_va_policy_default;
region.outer = uvm_va_block_cpu_page_index(va_block, next->node.start - 1) + 1;
}
}
*inout_node = next;
*inout_region = region;
return policy;
}
NV_STATUS uvm_va_policy_node_split(uvm_va_block_t *va_block, NV_STATUS uvm_va_policy_node_split(uvm_va_block_t *va_block,
uvm_va_policy_node_t *old, uvm_va_policy_node_t *old,
NvU64 new_end, NvU64 new_end,

View File

@ -28,6 +28,7 @@
#include "uvm_forward_decl.h" #include "uvm_forward_decl.h"
#include "uvm_processors.h" #include "uvm_processors.h"
#include "uvm_range_tree.h" #include "uvm_range_tree.h"
#include "uvm_va_block_types.h"
// This enum must be kept in sync with UVM_TEST_READ_DUPLICATION_POLICY in // This enum must be kept in sync with UVM_TEST_READ_DUPLICATION_POLICY in
// uvm_test_ioctl.h // uvm_test_ioctl.h
@ -168,6 +169,31 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, uvm
(node); \ (node); \
(node) = (next)) (node) = (next))
// Returns the first policy in the range [start, end], if any.
// Locking: The va_block lock must be held.
uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block,
NvU64 start,
NvU64 end,
uvm_va_policy_node_t **out_node,
uvm_va_block_region_t *out_region);
// Returns the next VA policy following the provided policy in address order,
// if that policy's start <= the provided end.
// Locking: The va_block lock must be held.
uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block,
uvm_va_policy_t *policy,
NvU64 end,
uvm_va_policy_node_t **inout_node,
uvm_va_block_region_t *inout_region);
// Note that policy and region are set and usable in the loop body.
// The 'node' variable is used to retain loop state and 'policy' doesn't
// necessarily match &node->policy.
#define uvm_for_each_va_policy_in(policy, va_block, start, end, node, region) \
for ((policy) = uvm_va_policy_iter_first((va_block), (start), (end), &(node), &(region)); \
(policy); \
(policy) = uvm_va_policy_iter_next((va_block), (policy), (end), &(node), &(region)))
#else // UVM_IS_CONFIG_HMM() #else // UVM_IS_CONFIG_HMM()
static NV_STATUS uvm_va_policy_init(void) static NV_STATUS uvm_va_policy_init(void)

View File

@ -30,9 +30,6 @@
#include "uvm_kvmalloc.h" #include "uvm_kvmalloc.h"
#include "uvm_map_external.h" #include "uvm_map_external.h"
#include "uvm_perf_thrashing.h" #include "uvm_perf_thrashing.h"
#include "nv_uvm_interface.h" #include "nv_uvm_interface.h"
static struct kmem_cache *g_uvm_va_range_cache __read_mostly; static struct kmem_cache *g_uvm_va_range_cache __read_mostly;
@ -378,10 +375,8 @@ NV_STATUS uvm_va_range_create_semaphore_pool(uvm_va_space_t *va_space,
if (status != NV_OK) if (status != NV_OK)
goto error; goto error;
if (i == 0 && g_uvm_global.sev_enabled)
mem_alloc_params.dma_owner = gpu;
if (attrs.is_cacheable) { if (attrs.is_cacheable) {
// At most 1 GPU can have this memory cached, in which case it is // At most 1 GPU can have this memory cached, in which case it is
@ -702,7 +697,7 @@ static void va_range_remove_gpu_va_space_managed(uvm_va_range_t *va_range,
for_each_va_block_in_va_range(va_range, va_block) { for_each_va_block_in_va_range(va_range, va_block) {
uvm_mutex_lock(&va_block->lock); uvm_mutex_lock(&va_block->lock);
uvm_va_block_remove_gpu_va_space(va_block, gpu_va_space, mm); uvm_va_block_remove_gpu_va_space(va_block, gpu_va_space, va_block_context);
uvm_mutex_unlock(&va_block->lock); uvm_mutex_unlock(&va_block->lock);
if (should_enable_read_duplicate) if (should_enable_read_duplicate)
@ -732,14 +727,7 @@ static void va_range_remove_gpu_va_space_semaphore_pool(uvm_va_range_t *va_range
{ {
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL); UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_SEMAPHORE_POOL);
uvm_mem_unmap_gpu_user(va_range->semaphore_pool.mem, gpu); uvm_mem_unmap_gpu_user(va_range->semaphore_pool.mem, gpu);
} }
void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range, void uvm_va_range_remove_gpu_va_space(uvm_va_range_t *va_range,
@ -1896,10 +1884,8 @@ NV_STATUS uvm_api_alloc_semaphore_pool(UVM_ALLOC_SEMAPHORE_POOL_PARAMS *params,
if (params->gpuAttributesCount > UVM_MAX_GPUS) if (params->gpuAttributesCount > UVM_MAX_GPUS)
return NV_ERR_INVALID_ARGUMENT; return NV_ERR_INVALID_ARGUMENT;
if (g_uvm_global.sev_enabled && params->gpuAttributesCount == 0)
return NV_ERR_INVALID_ARGUMENT;
// The mm needs to be locked in order to remove stale HMM va_blocks. // The mm needs to be locked in order to remove stale HMM va_blocks.
mm = uvm_va_space_mm_retain_lock(va_space); mm = uvm_va_space_mm_retain_lock(va_space);

View File

@ -365,13 +365,6 @@ static void unregister_gpu(uvm_va_space_t *va_space,
} }
} }
va_space_check_processors_masks(va_space); va_space_check_processors_masks(va_space);
} }
@ -751,17 +744,6 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
goto done; goto done;
} }
uvm_processor_mask_set(&va_space->registered_gpus, gpu->id); uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu; va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
@ -774,9 +756,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
// All GPUs have native atomics on their own memory // All GPUs have native atomics on their own memory
processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id); processor_mask_array_set(va_space->has_native_atomics, gpu->id, gpu->id);
// TODO: Bug 3252572: Support the new link type UVM_GPU_LINK_C2C
if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_1) { if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_1) {
processor_mask_array_set(va_space->has_nvlink, gpu->id, UVM_ID_CPU); processor_mask_array_set(va_space->has_nvlink, gpu->id, UVM_ID_CPU);
processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id); processor_mask_array_set(va_space->has_nvlink, UVM_ID_CPU, gpu->id);
@ -796,17 +776,11 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
processor_mask_array_set(va_space->can_access, gpu->id, gpu->id); processor_mask_array_set(va_space->can_access, gpu->id, gpu->id);
processor_mask_array_set(va_space->accessible_from, gpu->id, gpu->id); processor_mask_array_set(va_space->accessible_from, gpu->id, gpu->id);
// All GPUs have direct access to sysmem, unless we're in SEV mode
if (!g_uvm_global.sev_enabled) {
processor_mask_array_set(va_space->can_access, gpu->id, UVM_ID_CPU);
processor_mask_array_set(va_space->accessible_from, UVM_ID_CPU, gpu->id);
}
// All GPUs have direct access to sysmem
processor_mask_array_set(va_space->can_access, gpu->id, UVM_ID_CPU);
processor_mask_array_set(va_space->accessible_from, UVM_ID_CPU, gpu->id);
processor_mask_array_set(va_space->can_copy_from, gpu->id, gpu->id); processor_mask_array_set(va_space->can_copy_from, gpu->id, gpu->id);
processor_mask_array_set(va_space->can_copy_from, gpu->id, UVM_ID_CPU); processor_mask_array_set(va_space->can_copy_from, gpu->id, UVM_ID_CPU);

View File

@ -44,9 +44,6 @@
#include "uvm_ats.h" #include "uvm_ats.h"
#include "uvm_va_space_mm.h" #include "uvm_va_space_mm.h"
// uvm_deferred_free_object provides a mechanism for building and later freeing // uvm_deferred_free_object provides a mechanism for building and later freeing
// a list of objects which are owned by a VA space, but can't be freed while the // a list of objects which are owned by a VA space, but can't be freed while the
// VA space lock is held. // VA space lock is held.
@ -280,13 +277,6 @@ struct uvm_va_space_struct
// stored in the VA space to avoid taking the global lock. // stored in the VA space to avoid taking the global lock.
uvm_cpu_gpu_affinity_t gpu_cpu_numa_affinity[UVM_ID_MAX_GPUS]; uvm_cpu_gpu_affinity_t gpu_cpu_numa_affinity[UVM_ID_MAX_GPUS];
// Array of GPU VA spaces // Array of GPU VA spaces
uvm_gpu_va_space_t *gpu_va_spaces[UVM_ID_MAX_GPUS]; uvm_gpu_va_space_t *gpu_va_spaces[UVM_ID_MAX_GPUS];

View File

@ -806,13 +806,14 @@ NV_STATUS NV_API_CALL nv_dma_map_peer
( (
nv_dma_device_t *dma_dev, nv_dma_device_t *dma_dev,
nv_dma_device_t *peer_dma_dev, nv_dma_device_t *peer_dma_dev,
NvU8 bar_index, NvU8 nv_bar_index,
NvU64 page_count, NvU64 page_count,
NvU64 *va NvU64 *va
) )
{ {
struct pci_dev *peer_pci_dev = to_pci_dev(peer_dma_dev->dev); struct pci_dev *peer_pci_dev = to_pci_dev(peer_dma_dev->dev);
struct resource *res; struct resource *res;
NvU8 bar_index;
NV_STATUS status; NV_STATUS status;
if (peer_pci_dev == NULL) if (peer_pci_dev == NULL)
@ -822,7 +823,7 @@ NV_STATUS NV_API_CALL nv_dma_map_peer
return NV_ERR_INVALID_REQUEST; return NV_ERR_INVALID_REQUEST;
} }
BUG_ON(bar_index >= NV_GPU_NUM_BARS); bar_index = nv_bar_index_to_os_bar_index(peer_pci_dev, nv_bar_index);
res = &peer_pci_dev->resource[bar_index]; res = &peer_pci_dev->resource[bar_index];
if (res->start == 0) if (res->start == 0)
{ {
@ -1089,187 +1090,6 @@ void NV_API_CALL nv_dma_release_sgt
#endif /* NV_LINUX_DMA_BUF_H_PRESENT && NV_DRM_AVAILABLE && NV_DRM_DRM_GEM_H_PRESENT */ #endif /* NV_LINUX_DMA_BUF_H_PRESENT && NV_DRM_AVAILABLE && NV_DRM_DRM_GEM_H_PRESENT */
#if defined(NV_LINUX_DMA_BUF_H_PRESENT) #if defined(NV_LINUX_DMA_BUF_H_PRESENT)
#endif /* NV_LINUX_DMA_BUF_H_PRESENT */ #endif /* NV_LINUX_DMA_BUF_H_PRESENT */
#ifndef IMPORT_DMABUF_FUNCTIONS_DEFINED #ifndef IMPORT_DMABUF_FUNCTIONS_DEFINED

View File

@ -23,8 +23,6 @@
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include "nv-dmabuf.h" #include "nv-dmabuf.h"
#if defined(CONFIG_DMA_SHARED_BUFFER) #if defined(CONFIG_DMA_SHARED_BUFFER)
typedef struct nv_dma_buf_mem_handle typedef struct nv_dma_buf_mem_handle
{ {
@ -796,7 +794,6 @@ nv_dma_buf_reuse(
return NV_ERR_OPERATING_SYSTEM; return NV_ERR_OPERATING_SYSTEM;
} }
if (buf->ops != &nv_dma_buf_ops) if (buf->ops != &nv_dma_buf_ops)
{ {
nv_printf(NV_DBG_ERRORS, "NVRM: Invalid dma-buf fd\n"); nv_printf(NV_DBG_ERRORS, "NVRM: Invalid dma-buf fd\n");
@ -804,7 +801,6 @@ nv_dma_buf_reuse(
goto cleanup_dmabuf; goto cleanup_dmabuf;
} }
priv = buf->priv; priv = buf->priv;
if (priv == NULL) if (priv == NULL)
@ -820,13 +816,8 @@ nv_dma_buf_reuse(
goto cleanup_dmabuf; goto cleanup_dmabuf;
} }
if (params->index > (priv->total_objects - params->numObjects)) if (params->index > (priv->total_objects - params->numObjects))
{ {
status = NV_ERR_INVALID_ARGUMENT; status = NV_ERR_INVALID_ARGUMENT;
goto unlock_priv; goto unlock_priv;
} }
@ -900,15 +891,3 @@ nv_dma_buf_export(
#endif // CONFIG_DMA_SHARED_BUFFER #endif // CONFIG_DMA_SHARED_BUFFER
} }

View File

@ -27,11 +27,7 @@
#include "nv-frontend.h" #include "nv-frontend.h"
#if defined(MODULE_LICENSE) #if defined(MODULE_LICENSE)
MODULE_LICENSE("Dual MIT/GPL"); MODULE_LICENSE("Dual MIT/GPL");
#endif #endif
#if defined(MODULE_INFO) #if defined(MODULE_INFO)
MODULE_INFO(supported, "external"); MODULE_INFO(supported, "external");
@ -50,14 +46,12 @@ MODULE_ALIAS_CHARDEV_MAJOR(NV_MAJOR_DEVICE_NUMBER);
*/ */
#if defined(MODULE_IMPORT_NS) #if defined(MODULE_IMPORT_NS)
/* /*
* DMA_BUF namespace is added by commit id 16b0314aa746 * DMA_BUF namespace is added by commit id 16b0314aa746
* ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16 * ("dma-buf: move dma-buf symbols into the DMA_BUF module namespace") in 5.16
*/ */
MODULE_IMPORT_NS(DMA_BUF); MODULE_IMPORT_NS(DMA_BUF);
#endif #endif
static NvU32 nv_num_instances; static NvU32 nv_num_instances;

View File

@ -140,8 +140,9 @@ static int nv_i2c_algo_smbus_xfer(
case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_WORD_DATA:
if (read_write != I2C_SMBUS_READ) if (read_write != I2C_SMBUS_READ)
{ {
data->block[1] = (data->word & 0xff); u16 word = data->word;
data->block[2] = (data->word >> 8); data->block[1] = (word & 0xff);
data->block[2] = (word >> 8);
} }
rmStatus = rm_i2c_transfer(sp, nv, (void *)adapter, rmStatus = rm_i2c_transfer(sp, nv, (void *)adapter,
@ -273,246 +274,6 @@ void NV_API_CALL nv_i2c_del_adapter(nv_state_t *nv, void *data)
} }
} }
#else // defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) #else // defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)
void NV_API_CALL nv_i2c_del_adapter(nv_state_t *nv, void *data) void NV_API_CALL nv_i2c_del_adapter(nv_state_t *nv, void *data)
@ -524,29 +285,4 @@ void* NV_API_CALL nv_i2c_add_adapter(nv_state_t *nv, NvU32 port)
return NULL; return NULL;
} }
#endif // defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) #endif // defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE)

Some files were not shown because too many files have changed in this diff Show More