535.54.03

This commit is contained in:
Andy Ritger 2023-06-14 12:37:59 -07:00
parent eb5c7665a1
commit 26458140be
No known key found for this signature in database
GPG Key ID: 6D466BB75E006CFC
120 changed files with 83370 additions and 81507 deletions

View File

@ -2,6 +2,8 @@
## Release 535 Entries
### [535.54.03] 2023-06-14
### [535.43.02] 2023-05-30
#### Fixed

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.43.02.
version 535.54.03.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.43.02 driver release. This can be achieved by installing
535.54.03 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -180,7 +180,7 @@ software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.43.02 release,
(see the table below). However, in the 535.54.03 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.02/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.54.03/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -664,6 +664,7 @@ Subsystem Device ID.
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A30 | 20B7 10DE 1804 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.02\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.54.03\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@ -510,6 +510,12 @@ struct nv_file_private_t
nv_file_private_t *ctl_nvfp;
void *ctl_nvfp_priv;
NvU32 register_or_refcount;
//
// True if a client or an event was ever allocated on this fd.
// If false, RMAPI cleanup is skipped.
//
NvBool bCleanupRmapi;
};
// Forward define the gpu ops structures
@ -959,6 +965,8 @@ NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);

View File

@ -1455,12 +1455,12 @@ NV_STATUS nvUvmInterfacePagingChannelPushStream(UvmGpuPagingChannelHandle channe
concurrently with the same UvmCslContext parameter in different threads. The caller must
guarantee this exclusion.
* nvUvmInterfaceCslLogDeviceEncryption
* nvUvmInterfaceCslRotateIv
* nvUvmInterfaceCslEncrypt
* nvUvmInterfaceCslDecrypt
* nvUvmInterfaceCslSign
* nvUvmInterfaceCslQueryMessagePool
* nvUvmInterfaceCslIncrementIv
*/
/*******************************************************************************
@ -1495,62 +1495,17 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
*/
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
/*******************************************************************************
nvUvmInterfaceCslLogDeviceEncryption
Returns an IV that can be later used in the nvUvmInterfaceCslEncrypt
method. The IV contains a "freshness bit" which value is set by this method
and subsequently dirtied by nvUvmInterfaceCslEncrypt to prevent
non-malicious reuse of the IV.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
encryptIv[OUT] - Parameter that is stored before a successful
device encryption. It is used as an input to
nvUvmInterfaceCslEncrypt.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - New IV would cause a counter to overflow.
*/
NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
UvmCslIv *encryptIv);
/*******************************************************************************
nvUvmInterfaceCslLogDeviceEncryption
Logs and checks information about device encryption.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
decryptIv[OUT] - Parameter that is stored before a successful
device encryption. It is used as an input to
nvUvmInterfaceCslDecrypt.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
to overflow.
*/
NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
UvmCslIv *decryptIv);
/*******************************************************************************
nvUvmInterfaceCslRotateIv
Rotates the IV for a given channel and direction.
Rotates the IV for a given channel and operation.
This function will rotate the IV on both the CPU and the GPU.
Outstanding messages that have been encrypted by the GPU should first be
decrypted before calling this function with direction equal to
UVM_CSL_DIR_GPU_TO_CPU. Similiarly, outstanding messages that have been
decrypted before calling this function with operation equal to
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
encrypted by the CPU should first be decrypted before calling this function
with direction equal to UVM_CSL_DIR_CPU_TO_GPU. For a given direction
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
the channel must be idle before calling this function. This function can be
called regardless of the value of the IV's message counter.
@ -1559,17 +1514,17 @@ NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
direction[IN] - Either
- UVM_CSL_DIR_CPU_TO_GPU
- UVM_CSL_DIR_GPU_TO_CPU
operation[IN] - Either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The rotate operation would cause a counter
to overflow.
NV_ERR_INVALID_ARGUMENT - Invalid value for direction.
NV_ERR_INVALID_ARGUMENT - Invalid value for operation.
*/
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
UvmCslDirection direction);
UvmCslOperation operation);
/*******************************************************************************
nvUvmInterfaceCslEncrypt
@ -1580,7 +1535,7 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
this function produces undefined behavior. Performance is typically
maximized when the input and output buffers are 16-byte aligned. This is
natural alignment for AES block.
The encryptIV can be obtained from nvUvmInterfaceCslAcquireEncryptionIv.
The encryptIV can be obtained from nvUvmInterfaceCslIncrementIv.
However, it is optional. If it is NULL, the next IV in line will be used.
See "CSL Interface and Locking" for locking requirements.
@ -1623,12 +1578,18 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[IN] - Size of the input and output buffers in
units of bytes. Value can range from 1 byte
to (2^32) - 1 bytes.
decryptIv[IN] - Parameter given by nvUvmInterfaceCslLogDeviceEncryption.
bufferSize[IN] - Size of the input and output buffers in units of bytes.
Value can range from 1 byte to (2^32) - 1 bytes.
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
internal counter is used.
inputBuffer[IN] - Address of ciphertext input buffer.
outputBuffer[OUT] - Address of plaintext output buffer.
addAuthData[IN] - Address of the plaintext additional authenticated data used to
calculate the authentication tag. Can be NULL.
addAuthDataSize[IN] - Size of the additional authenticated data in units of bytes.
Value can range from 1 byte to (2^32) - 1 bytes.
This parameter is ignored if addAuthData is NULL.
authTagBuffer[IN] - Address of authentication tag buffer.
Its size is UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES.
@ -1643,6 +1604,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagBuffer);
/*******************************************************************************
@ -1673,7 +1636,6 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
NvU8 const *inputBuffer,
NvU8 *authTagBuffer);
/*******************************************************************************
nvUvmInterfaceCslQueryMessagePool
@ -1684,14 +1646,45 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
direction[IN] - Either UVM_CSL_DIR_CPU_TO_GPU or UVM_CSL_DIR_GPU_TO_CPU.
operation[IN] - Either UVM_CSL_OPERATION_ENCRYPT or UVM_CSL_OPERATION_DECRYPT.
messageNum[OUT] - Number of messages left before overflow.
Error codes:
NV_ERR_INVALID_ARGUMENT - The value of the direction parameter is illegal.
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
*/
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
UvmCslDirection direction,
UvmCslOperation operation,
NvU64 *messageNum);
/*******************************************************************************
nvUvmInterfaceCslIncrementIv
Increments the message counter by the specified amount.
If iv is non-NULL then the incremented value is returned.
If operation is UVM_CSL_OPERATION_ENCRYPT then the returned IV's "freshness" bit is set and
can be used in nvUvmInterfaceCslEncrypt. If operation is UVM_CSL_OPERATION_DECRYPT then
the returned IV can be used in nvUvmInterfaceCslDecrypt.
See "CSL Interface and Locking" for locking requirements.
This function does not perform dynamic memory allocation.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
operation[IN] - Either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
increment[IN] - The amount by which the IV is incremented. Can be 0.
iv[out] - If non-NULL, a buffer to store the incremented IV.
Error codes:
NV_ERR_INVALID_ARGUMENT - The value of the operation parameter is illegal.
NV_ERR_INSUFFICIENT_RESOURCES - Incrementing the message counter would result
in an overflow.
*/
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU64 increment,
UvmCslIv *iv);
#endif // _NV_UVM_INTERFACE_H_

View File

@ -286,6 +286,7 @@ typedef struct UvmGpuChannelInfo_tag
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
@ -1060,10 +1061,10 @@ typedef struct UvmCslIv
NvU8 fresh;
} UvmCslIv;
typedef enum UvmCslDirection
typedef enum UvmCslOperation
{
UVM_CSL_DIR_CPU_TO_GPU,
UVM_CSL_DIR_GPU_TO_CPU
} UvmCslDirection;
UVM_CSL_OPERATION_ENCRYPT,
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;
#endif // _NV_UVM_TYPES_H_

View File

@ -103,13 +103,12 @@ NV_STATUS NV_API_CALL rm_gpu_ops_paging_channel_push_stream(nvidia_stack_t *, n
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_init(nvidia_stack_t *, struct ccslContext_t **, nvgpuChannelHandle_t);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *, struct ccslContext_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_acquire_encryption_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_encrypt_with_iv(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8*, NvU8 *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 const *, NvU8 *, NvU8 const *, NvU32, NvU8 const *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_sign(nvidia_stack_t *, struct ccslContext_t *, NvU32, NvU8 const *, NvU8 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64 *);
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *, struct ccslContext_t *, NvU8, NvU64, NvU8 *);
#endif

View File

@ -919,6 +919,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VFIO_MIGRATION_OPS_PRESENT" "" "types"
;;
vfio_precopy_info)
#
# Determine if vfio_precopy_info struct is present or not
#
# Added by commit 4db52602a6074 ("vfio: Extend the device migration
# protocol with PRE_COPY" in v6.2
#
CODE="
#include <linux/vfio.h>
struct vfio_precopy_info precopy_info;
"
compile_check_conftest "$CODE" "NV_VFIO_PRECOPY_INFO_PRESENT" "" "types"
;;
vfio_log_ops)
#
# Determine if vfio_log_ops struct is present or not

View File

@ -179,6 +179,7 @@ static inline int nv_drm_gem_handle_create(struct drm_file *filp,
return drm_gem_handle_create(filp, &nv_gem->base, handle);
}
#if defined(NV_DRM_FENCE_AVAILABLE)
static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem)
{
#if defined(NV_DRM_GEM_OBJECT_HAS_RESV)
@ -187,6 +188,7 @@ static inline nv_dma_resv_t *nv_drm_gem_res_obj(struct nv_drm_gem_object *nv_gem
return nv_gem->base.dma_buf ? nv_gem->base.dma_buf->resv : &nv_gem->resv;
#endif
}
#endif
void nv_drm_gem_object_init(struct nv_drm_device *nv_dev,
struct nv_drm_gem_object *nv_gem,

View File

@ -338,11 +338,6 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
return NV_OK;
}
if (!gpu->parent->ce_hal->memcopy_is_valid(&push, dst, src)) {
TEST_NV_CHECK_RET(uvm_push_end_and_wait(&push));
return NV_OK;
}
// The input virtual addresses exist in UVM's internal address space, not
// the proxy address space
if (uvm_channel_is_proxy(push.channel)) {
@ -401,7 +396,7 @@ static NV_STATUS test_memcpy_and_memset_inner(uvm_gpu_t *gpu,
static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
{
NV_STATUS status = NV_OK;
bool is_proxy_va_space;
bool is_proxy_va_space = false;
uvm_gpu_address_t gpu_verif_addr;
void *cpu_verif_addr;
uvm_mem_t *verif_mem = NULL;
@ -437,6 +432,34 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
}
}
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_addresses[0] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
if (uvm_conf_computing_mode_enabled(gpu)) {
for (i = 0; i < iterations; ++i) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
gpu_addresses[0],
gpu_addresses[0],
size,
element_sizes[s],
gpu_verif_addr,
cpu_verif_addr,
i),
done);
}
}
// Because gpu_verif_addr is in sysmem, when the Confidential
// Computing feature is enabled, only the previous cases are valid.
// TODO: Bug 3839176: the test partially waived on Confidential
// Computing because it assumes that GPU can access system memory
// without using encryption.
goto done;
}
// Using a page size equal to the allocation size ensures that the UVM
// memories about to be allocated are physically contiguous. And since the
// size is a valid GPU page size, the memories can be virtually mapped on
@ -448,37 +471,22 @@ static NV_STATUS test_memcpy_and_memset(uvm_gpu_t *gpu)
// Physical address in sysmem
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &sys_uvm_mem), done);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_phys(sys_uvm_mem, gpu), done);
gpu_addresses[0] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
gpu_addresses[1] = uvm_mem_gpu_address_physical(sys_uvm_mem, gpu, 0, size);
// Physical address in vidmem
mem_params.backing_gpu = gpu;
TEST_NV_CHECK_GOTO(uvm_mem_alloc(&mem_params, &gpu_uvm_mem), done);
gpu_addresses[1] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
gpu_addresses[2] = uvm_mem_gpu_address_physical(gpu_uvm_mem, gpu, 0, size);
// Virtual address (in UVM's internal address space) backed by vidmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_GPU, size, 0, &gpu_rm_mem), done);
is_proxy_va_space = false;
gpu_addresses[2] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(gpu_rm_mem, gpu, is_proxy_va_space);
// Virtual address (in UVM's internal address space) backed by sysmem
TEST_NV_CHECK_GOTO(uvm_rm_mem_alloc(gpu, UVM_RM_MEM_TYPE_SYS, size, 0, &sys_rm_mem), done);
gpu_addresses[3] = uvm_rm_mem_get_gpu_va(sys_rm_mem, gpu, is_proxy_va_space);
for (i = 0; i < iterations; ++i) {
for (j = 0; j < ARRAY_SIZE(gpu_addresses); ++j) {
for (k = 0; k < ARRAY_SIZE(gpu_addresses); ++k) {
for (s = 0; s < ARRAY_SIZE(element_sizes); s++) {
// Because gpu_verif_addr is in sysmem, when the Confidential
// Computing feature is enabled, only the following cases are
// valid.
//
// TODO: Bug 3839176: the test partially waived on
// Confidential Computing because it assumes that GPU can
// access system memory without using encryption.
if (uvm_conf_computing_mode_enabled(gpu) &&
!(gpu_addresses[k].is_unprotected && gpu_addresses[j].is_unprotected)) {
continue;
}
TEST_NV_CHECK_GOTO(test_memcpy_and_memset_inner(gpu,
gpu_addresses[k],
gpu_addresses[j],

View File

@ -750,9 +750,9 @@ static void internal_channel_submit_work_wlc(uvm_push_t *push)
wlc_channel->channel_info.workSubmissionToken);
}
static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
static void internal_channel_submit_work_indirect_wlc(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
@ -765,10 +765,211 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
void *push_enc_auth_tag;
uvm_gpu_address_t push_enc_auth_tag_gpu;
NvU64 gpfifo_gpu = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
NvU64 gpfifo_gpu_va = push->channel->channel_info.gpFifoGpuVa + old_cpu_put * sizeof(gpfifo_entry);
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_wlc(push->launch_channel));
// WLC submissions are done under channel lock, so there should be no
// contention to get the right submission order.
UVM_ASSERT(push->channel->conf_computing.gpu_put == old_cpu_put);
// This can never stall or return error. WLC launch after WLC channels are
// initialized uses private static pb space and it neither needs the general
// PB space, nor it counts towards max concurrent pushes.
status = uvm_push_begin_on_reserved_channel(push->launch_channel,
&indirect_push,
"Worklaunch to '%s' via '%s'",
push->channel->name,
push->launch_channel->name);
UVM_ASSERT(status == NV_OK);
// Move over the pushbuffer data
// WLC channels use a static preallocated space for launch auth tags
push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
uvm_conf_computing_cpu_encrypt(indirect_push.channel,
push_enc_cpu,
push->begin,
NULL,
uvm_push_get_size(push),
push_enc_auth_tag);
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->decrypt(&indirect_push,
uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
uvm_gpu_address_virtual(push_enc_gpu),
uvm_push_get_size(push),
push_enc_auth_tag_gpu);
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
uvm_push_get_size(push),
UVM_GPFIFO_SYNC_PROCEED);
gpu->parent->ce_hal->memset_8(&indirect_push,
uvm_gpu_address_virtual(gpfifo_gpu_va),
gpfifo_entry,
sizeof(gpfifo_entry));
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push,
push->channel->channel_info.workSubmissionOffsetGpuVa,
push->channel->channel_info.workSubmissionToken);
// Ignore return value of push_wait. It can only fail with channel error
// which will be detected when waiting for the primary push.
(void)uvm_push_end_and_wait(&indirect_push);
push->channel->conf_computing.gpu_put = new_gpu_put;
}
static void update_gpput_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 new_gpu_put)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
void *gpput_auth_tag_cpu, *gpput_enc_cpu;
uvm_gpu_address_t gpput_auth_tag_gpu, gpput_enc_gpu;
NvU32 gpput_scratchpad[UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT/sizeof(new_gpu_put)];
UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
gpput_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
&gpput_enc_gpu);
gpput_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&gpput_auth_tag_gpu);
// Update GPPUT. The update needs 4B write to specific offset,
// however we can only do 16B aligned decrypt writes.
// A poison value is written to all other locations, this is ignored in
// most locations and overwritten by HW for GPGET location
memset(gpput_scratchpad, 0, sizeof(gpput_scratchpad));
UVM_ASSERT(sizeof(*gpput_scratchpad) == sizeof(new_gpu_put));
gpput_scratchpad[(channel->channel_info.gpPutGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
sizeof(*gpput_scratchpad)] = new_gpu_put;
// Set value of GPGET to be the same as GPPUT. It will be overwritten by
// HW next time GET value changes. UVM never reads GPGET.
// However, RM does read it when freeing a channel. When this function
// is called from 'channel_manager_stop_wlc' we set the value of GPGET
// to the same value as GPPUT. Mismatch between these two values makes
// RM wait for any "pending" tasks, leading to significant delays in the
// channel teardown sequence.
UVM_ASSERT(channel->channel_info.gpPutGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT ==
channel->channel_info.gpGetGpuVa / UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
gpput_scratchpad[(channel->channel_info.gpGetGpuVa % UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT) /
sizeof(*gpput_scratchpad)] = new_gpu_put;
uvm_conf_computing_cpu_encrypt(sec2_push->channel,
gpput_enc_cpu,
gpput_scratchpad,
NULL,
sizeof(gpput_scratchpad),
gpput_auth_tag_cpu);
gpu->parent->sec2_hal->decrypt(sec2_push,
UVM_ALIGN_DOWN(channel->channel_info.gpPutGpuVa,
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
gpput_enc_gpu.address,
sizeof(gpput_scratchpad),
gpput_auth_tag_gpu.address);
}
static void set_gpfifo_via_sec2(uvm_push_t *sec2_push, uvm_channel_t *channel, NvU32 put, NvU64 value)
{
uvm_gpu_t *gpu = uvm_push_get_gpu(sec2_push);
void *gpfifo_auth_tag_cpu, *gpfifo_enc_cpu;
uvm_gpu_address_t gpfifo_auth_tag_gpu, gpfifo_enc_gpu;
NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + put * sizeof(value);
NvU64 gpfifo_scratchpad[2];
UVM_ASSERT(uvm_channel_is_sec2(sec2_push->channel));
gpfifo_enc_cpu = uvm_push_get_single_inline_buffer(sec2_push,
sizeof(gpfifo_scratchpad),
UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT,
&gpfifo_enc_gpu);
gpfifo_auth_tag_cpu = uvm_push_get_single_inline_buffer(sec2_push,
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&gpfifo_auth_tag_gpu);
if (IS_ALIGNED(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT)) {
gpfifo_scratchpad[0] = value;
// Set the value of the odd entry to noop.
// It will be overwritten when the next entry is submitted.
gpu->parent->host_hal->set_gpfifo_noop(&gpfifo_scratchpad[1]);
}
else {
uvm_gpfifo_entry_t *previous_gpfifo;
UVM_ASSERT(put > 0);
previous_gpfifo = &channel->gpfifo_entries[put - 1];
if (previous_gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
gpfifo_scratchpad[0] = previous_gpfifo->control_value;
}
else {
uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
NvU64 prev_pb_va = uvm_pushbuffer_get_gpu_va_base(pushbuffer) + previous_gpfifo->pushbuffer_offset;
// Reconstruct the previous gpfifo entry. UVM_GPFIFO_SYNC_WAIT is
// used only in static WLC schedule.
// Overwriting the previous entry with the same value doesn't hurt,
// whether the previous entry has been processed or not
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_scratchpad[0],
prev_pb_va,
previous_gpfifo->pushbuffer_size,
UVM_GPFIFO_SYNC_PROCEED);
}
gpfifo_scratchpad[1] = value;
}
uvm_conf_computing_cpu_encrypt(sec2_push->channel,
gpfifo_enc_cpu,
gpfifo_scratchpad,
NULL,
sizeof(gpfifo_scratchpad),
gpfifo_auth_tag_cpu);
gpu->parent->sec2_hal->decrypt(sec2_push,
UVM_ALIGN_DOWN(gpfifo_gpu, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT),
gpfifo_enc_gpu.address,
sizeof(gpfifo_scratchpad),
gpfifo_auth_tag_gpu.address);
}
static NV_STATUS internal_channel_submit_work_indirect_sec2(uvm_push_t *push,
NvU32 old_cpu_put,
NvU32 new_gpu_put)
{
uvm_pushbuffer_t *pushbuffer = push->channel->pool->manager->pushbuffer;
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
uvm_push_t indirect_push;
NV_STATUS status;
NvU64 gpfifo_entry;
void *push_enc_cpu = uvm_pushbuffer_get_unprotected_cpu_va_for_push(pushbuffer, push);
NvU64 push_enc_gpu = uvm_pushbuffer_get_unprotected_gpu_va_for_push(pushbuffer, push);
void *push_auth_tag_cpu;
uvm_gpu_address_t push_auth_tag_gpu;
uvm_spin_loop_t spin;
UVM_ASSERT(!uvm_channel_is_sec2(push->channel));
UVM_ASSERT(uvm_channel_is_sec2(push->launch_channel));
// If the old_cpu_put is not equal to the last gpu put, other pushes are
// pending that need to be submitted. That push/es' submission will update
@ -790,60 +991,36 @@ static NV_STATUS internal_channel_submit_work_indirect(uvm_push_t *push,
// Move over the pushbuffer data
if (uvm_channel_is_sec2(indirect_push.channel)) {
push_enc_auth_tag = uvm_push_get_single_inline_buffer(&indirect_push,
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&push_enc_auth_tag_gpu);
}
else {
// Auth tags cannot be in protected vidmem.
// WLC channels use a static preallocated space for launch auth tags
push_enc_auth_tag = indirect_push.channel->conf_computing.launch_auth_tag_cpu;
push_enc_auth_tag_gpu = uvm_gpu_address_virtual(indirect_push.channel->conf_computing.launch_auth_tag_gpu_va);
}
push_auth_tag_cpu = uvm_push_get_single_inline_buffer(&indirect_push,
UVM_CONF_COMPUTING_AUTH_TAG_SIZE,
UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT,
&push_auth_tag_gpu);
uvm_conf_computing_cpu_encrypt(indirect_push.channel,
push_enc_cpu,
push->begin,
NULL,
uvm_push_get_size(push),
push_enc_auth_tag);
push_auth_tag_cpu);
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
if (uvm_channel_is_sec2(indirect_push.channel)) {
gpu->parent->sec2_hal->decrypt(&indirect_push,
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
push_enc_gpu,
uvm_push_get_size(push),
push_enc_auth_tag_gpu.address);
}
else {
gpu->parent->ce_hal->decrypt(&indirect_push,
uvm_gpu_address_virtual(uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push)),
uvm_gpu_address_virtual(push_enc_gpu),
uvm_push_get_size(push),
push_enc_auth_tag_gpu);
}
gpu->parent->sec2_hal->decrypt(&indirect_push,
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
push_enc_gpu,
uvm_push_get_size(push),
push_auth_tag_gpu.address);
gpu->parent->host_hal->set_gpfifo_entry(&gpfifo_entry,
uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push),
uvm_push_get_size(push),
UVM_GPFIFO_SYNC_PROCEED);
// TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
// SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
// Use 2x semaphore release to set the values directly.
// We could use a single 64 bit release if it were available
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(gpfifo_entry));
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
do_semaphore_release(&indirect_push, gpfifo_gpu + 4, NvU64_HI32(gpfifo_entry));
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push, push->channel->channel_info.gpPutGpuVa, new_gpu_put);
set_gpfifo_via_sec2(&indirect_push, push->channel, old_cpu_put, gpfifo_entry);
update_gpput_via_sec2(&indirect_push, push->channel, new_gpu_put);
// Ring the doorbell
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push,
push->channel->channel_info.workSubmissionOffsetGpuVa,
@ -930,11 +1107,7 @@ void uvm_channel_end_push(uvm_push_t *push)
}
else if (uvm_conf_computing_mode_enabled(channel_manager->gpu) && !uvm_channel_is_sec2(channel)) {
if (uvm_channel_manager_is_wlc_ready(channel_manager)) {
NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
// This codepath should only be used during initialization and thus
// NEVER return an error.
UVM_ASSERT(status == NV_OK);
internal_channel_submit_work_indirect_wlc(push, cpu_put, new_cpu_put);
}
else {
// submitting via SEC2 starts a push, postpone until this push is ended
@ -963,7 +1136,7 @@ void uvm_channel_end_push(uvm_push_t *push)
wmb();
if (needs_sec2_work_submit) {
NV_STATUS status = internal_channel_submit_work_indirect(push, cpu_put, new_cpu_put);
NV_STATUS status = internal_channel_submit_work_indirect_sec2(push, cpu_put, new_cpu_put);
// This codepath should only be used during initialization and thus
// NEVER return an error.
@ -1007,7 +1180,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
uvm_channel_type_t indirect_channel_type = uvm_channel_manager_is_wlc_ready(channel->pool->manager) ?
UVM_CHANNEL_TYPE_WLC :
UVM_CHANNEL_TYPE_SEC2;
NvU64 gpfifo_gpu = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));
UVM_ASSERT(!uvm_channel_is_sec2(channel));
@ -1026,17 +1198,26 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
if (status != NV_OK)
return status;
// TODO: Bug 2649842: RFE - Investigate using 64-bit semaphore
// SEC2 needs encrypt decrypt to be 16B aligned GPFIFO entries are only 8B
// Use 2x semaphore release to set the values directly.
// One 64bit semahore release can be used instead once implemented.
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
do_semaphore_release(&indirect_push, gpfifo_gpu, NvU64_LO32(entry->control_value));
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
do_semaphore_release(&indirect_push, gpfifo_gpu + 4, NvU64_HI32(entry->control_value));
if (uvm_channel_is_sec2(indirect_push.channel)) {
set_gpfifo_via_sec2(&indirect_push, channel, old_cpu_put, entry->control_value);
update_gpput_via_sec2(&indirect_push, channel, new_gpu_put);
} else {
uvm_gpu_t *gpu = uvm_push_get_gpu(&indirect_push);
NvU64 gpfifo_gpu_va = channel->channel_info.gpFifoGpuVa + (old_cpu_put * sizeof(entry->control_value));
gpu->parent->ce_hal->memset_8(&indirect_push,
uvm_gpu_address_virtual(gpfifo_gpu_va),
entry->control_value,
sizeof(entry->control_value));
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
}
uvm_push_set_flag(&indirect_push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
do_semaphore_release(&indirect_push, channel->channel_info.gpPutGpuVa, new_gpu_put);
do_semaphore_release(&indirect_push,
channel->channel_info.workSubmissionOffsetGpuVa,
channel->channel_info.workSubmissionToken);
status = uvm_push_end_and_wait(&indirect_push);
if (status != NV_OK)
@ -1044,9 +1225,6 @@ static NV_STATUS submit_ctrl_gpfifo_indirect(uvm_channel_t *channel,
channel->conf_computing.gpu_put = new_gpu_put;
// The above SEC2 work transferred everything
// Ring the doorbell
UVM_GPU_WRITE_ONCE(*channel->channel_info.workSubmissionOffset, channel->channel_info.workSubmissionToken);
return NV_OK;
}
@ -1445,17 +1623,21 @@ static NV_STATUS alloc_conf_computing_buffers_semaphore(uvm_channel_t *channel)
static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
{
uvm_gpu_t *gpu = channel->pool->manager->gpu;
size_t aligned_wlc_push_size = UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT);
NV_STATUS status = uvm_rm_mem_alloc_and_map_cpu(gpu,
UVM_RM_MEM_TYPE_SYS,
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE * 2,
PAGE_SIZE,
&channel->conf_computing.static_pb_unprotected_sysmem);
if (status != NV_OK)
return status;
// Both pushes will be targets for SEC2 decrypt operations and have to
// be aligned for SEC2. The first push location will also be a target
// for CE decrypt operation and has to be aligned for CE decrypt.
status = uvm_rm_mem_alloc(gpu,
UVM_RM_MEM_TYPE_GPU,
UVM_MAX_WLC_PUSH_SIZE * 2,
UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT) * 2,
UVM_CONF_COMPUTING_BUF_ALIGNMENT,
&channel->conf_computing.static_pb_protected_vidmem);
if (status != NV_OK)
@ -1464,16 +1646,16 @@ static NV_STATUS alloc_conf_computing_buffers_wlc(uvm_channel_t *channel)
channel->conf_computing.static_pb_unprotected_sysmem_cpu =
uvm_rm_mem_get_cpu_va(channel->conf_computing.static_pb_unprotected_sysmem);
channel->conf_computing.static_pb_unprotected_sysmem_auth_tag_cpu =
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + UVM_MAX_WLC_PUSH_SIZE;
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu + aligned_wlc_push_size;
// The location below is only used for launch pushes but reuses
// the same sysmem allocation
channel->conf_computing.launch_auth_tag_cpu =
(char*)channel->conf_computing.static_pb_unprotected_sysmem_cpu +
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
channel->conf_computing.launch_auth_tag_gpu_va =
uvm_rm_mem_get_gpu_uvm_va(channel->conf_computing.static_pb_unprotected_sysmem, gpu) +
UVM_MAX_WLC_PUSH_SIZE + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
aligned_wlc_push_size + UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
channel->conf_computing.static_pb_protected_sysmem = uvm_kvmalloc(UVM_MAX_WLC_PUSH_SIZE + UVM_PAGE_SIZE_4K);
if (!channel->conf_computing.static_pb_protected_sysmem)
@ -2576,7 +2758,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
// "decrypt_push" represents WLC decrypt push, constructed using fake_push.
// Copied to wlc_pb_base + UVM_MAX_WLC_PUSH_SIZE, as the second of the two
// pushes that make the WLC fixed schedule.
NvU64 decrypt_push_protected_gpu = protected_vidmem + UVM_MAX_WLC_PUSH_SIZE;
NvU64 decrypt_push_protected_gpu = UVM_ALIGN_UP(protected_vidmem + UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT);
NvU64 decrypt_push_unprotected_gpu = unprotected_sysmem_gpu + gpfifo_size;
void *decrypt_push_unprotected_cpu = (char*)gpfifo_unprotected_cpu + gpfifo_size;
@ -2587,7 +2769,7 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
BUILD_BUG_ON(sizeof(*wlc_gpfifo_entries) != sizeof(*wlc->channel_info.gpFifoEntries));
UVM_ASSERT(uvm_channel_is_wlc(wlc));
UVM_ASSERT(tag_offset == UVM_MAX_WLC_PUSH_SIZE);
UVM_ASSERT(tag_offset == UVM_ALIGN_UP(UVM_MAX_WLC_PUSH_SIZE, UVM_CONF_COMPUTING_AUTH_TAG_ALIGNMENT));
// WLC schedule consists of two parts, the number of entries needs to be even.
// This also guarantees that the size is 16B aligned
@ -2692,11 +2874,9 @@ static NV_STATUS setup_wlc_schedule(uvm_channel_t *wlc)
// Prime the WLC by setting "PUT" two steps ahead. Reuse the current
// cpu_put value that was used during channel initialization.
// Don't update wlc->cpu_put, it will be used to track
// submitted pushes as any other channel.
do_semaphore_release(&sec2_push,
wlc->channel_info.gpPutGpuVa,
(wlc->cpu_put + 2) % wlc->num_gpfifo_entries);
// Don't update wlc->cpu_put, it will be used to track submitted pushes
// as any other channel.
update_gpput_via_sec2(&sec2_push, wlc, (wlc->cpu_put + 2) % wlc->num_gpfifo_entries);
status = uvm_push_end_and_wait(&sec2_push);
@ -3048,9 +3228,7 @@ static void channel_manager_stop_wlc(uvm_channel_manager_t *manager)
// Every gpfifo entry advances the gpu put of WLC by two so the current
// value is: (cpu_put * 2) % num_gpfifo_entries and it's ahead of the
// get pointer by 2.
do_semaphore_release(&push,
channel->channel_info.gpPutGpuVa,
(channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
update_gpput_via_sec2(&push, channel, (channel->cpu_put * 2 - 2) % channel->num_gpfifo_entries);
}
status = uvm_push_end_and_wait(&push);

View File

@ -378,11 +378,12 @@ void uvm_conf_computing_log_gpu_encryption(uvm_channel_t *channel, UvmCslIv *iv)
NV_STATUS status;
uvm_mutex_lock(&channel->csl.ctx_lock);
status = nvUvmInterfaceCslLogDeviceEncryption(&channel->csl.ctx, iv);
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_DECRYPT, 1, iv);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
// overflows. This is not supposed to happen on CC.
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
// nvUvmInterfaceCslRotateIv.
UVM_ASSERT(status == NV_OK);
}
@ -391,11 +392,12 @@ void uvm_conf_computing_acquire_encryption_iv(uvm_channel_t *channel, UvmCslIv *
NV_STATUS status;
uvm_mutex_lock(&channel->csl.ctx_lock);
status = nvUvmInterfaceCslAcquireEncryptionIv(&channel->csl.ctx, iv);
status = nvUvmInterfaceCslIncrementIv(&channel->csl.ctx, UVM_CSL_OPERATION_ENCRYPT, 1, iv);
uvm_mutex_unlock(&channel->csl.ctx_lock);
// nvUvmInterfaceLogDeviceEncryption fails when a 64-bit encryption counter
// overflows. This is not supposed to happen on CC.
// TODO: Bug 4014720: If nvUvmInterfaceCslIncrementIv returns with
// NV_ERR_INSUFFICIENT_RESOURCES then the IV needs to be rotated via
// nvUvmInterfaceCslRotateIv.
UVM_ASSERT(status == NV_OK);
}
@ -439,6 +441,8 @@ NV_STATUS uvm_conf_computing_cpu_decrypt(uvm_channel_t *channel,
(const NvU8 *) src_cipher,
src_iv,
(NvU8 *) dst_plain,
NULL,
0,
(const NvU8 *) auth_tag_buffer);
uvm_mutex_unlock(&channel->csl.ctx_lock);

View File

@ -42,9 +42,11 @@
// Use sizeof(UvmCslIv) to refer to the IV size.
#define UVM_CONF_COMPUTING_IV_ALIGNMENT 16
// SEC2 decrypt operation buffers are required to be 16-bytes aligned. CE
// encrypt/decrypt can be unaligned if the buffer lies in a single 32B segment.
// Otherwise, they need to be 32B aligned.
// SEC2 decrypt operation buffers are required to be 16-bytes aligned.
#define UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT 16
// CE encrypt/decrypt can be unaligned if the entire buffer lies in a single
// 32B segment. Otherwise, it needs to be 32B aligned.
#define UVM_CONF_COMPUTING_BUF_ALIGNMENT 32
#define UVM_CONF_COMPUTING_DMA_BUFFER_SIZE UVM_VA_BLOCK_SIZE

View File

@ -2575,7 +2575,7 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
continue;
}
if (folio_test_swapcache(page_folio(src_page))) {
if (PageSwapCache(src_page)) {
// TODO: Bug 4050579: Remove this when swap cached pages can be
// migrated.
if (service_context) {

View File

@ -166,6 +166,7 @@ void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, N
NvU32 *csl_sign_init = push->next;
// Check that the provided alignment matches HW
BUILD_BUG_ON(UVM_CONF_COMPUTING_SEC2_BUF_ALIGNMENT != (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT < (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)));
BUILD_BUG_ON(UVM_CONF_COMPUTING_BUF_ALIGNMENT % (1 << HWSHIFT(CBA2, DECRYPT_COPY_DST_ADDR_LO, DATA)) != 0);

View File

@ -161,22 +161,22 @@
// * WFI: 8B
// Total: 64B
//
// Push space needed for secure work launch is 224B. The push is constructed
// Push space needed for secure work launch is 364B. The push is constructed
// in 'internal_channel_submit_work_indirect' and 'uvm_channel_end_push'
// * CE decrypt (of indirect PB): 56B
// * 2*semaphore release (indirect GPFIFO entry): 2*24B
// * memset_8 (indirect GPFIFO entry): 44B
// * semaphore release (indirect GPPUT): 24B
// * semaphore release (indirect doorbell): 24B
// Appendix added in 'uvm_channel_end_push':
// * semaphore release (WLC tracking): 168B
// * semaphore increment (memcopy): 24B
// * semaphore release (payload): 24B
// * notifier memset: 40B
// * payload encryption: 64B
// * notifier memset: 40B
// * semaphore increment (LCIC GPPUT): 24B
// * semaphore release (LCIC doorbell): 24B
// Total: 368B
#define UVM_MAX_WLC_PUSH_SIZE (368)
// Total: 364B
#define UVM_MAX_WLC_PUSH_SIZE (364)
// Push space needed for static LCIC schedule, as initialized in
// 'setup_lcic_schedule':
@ -184,7 +184,7 @@
// * semaphore increment (WLC GPPUT): 24B
// * semaphore increment (WLC GPPUT): 24B
// * semaphore increment (LCIC tracking): 160B
// * semaphore increment (memcopy): 24B
// * semaphore increment (payload): 24B
// * notifier memcopy: 36B
// * payload encryption: 64B
// * notifier memcopy: 36B

View File

@ -213,6 +213,7 @@ done:
typedef enum
{
MEM_ALLOC_TYPE_SYSMEM_DMA,
MEM_ALLOC_TYPE_SYSMEM_PROTECTED,
MEM_ALLOC_TYPE_VIDMEM_PROTECTED
} mem_alloc_type_t;
@ -274,7 +275,11 @@ static NV_STATUS alloc_and_init_mem(uvm_gpu_t *gpu, uvm_mem_t **mem, size_t size
TEST_NV_CHECK_GOTO(ce_memset_gpu(gpu, *mem, size, 0xdead), err);
}
else {
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
if (type == MEM_ALLOC_TYPE_SYSMEM_DMA)
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem_dma(size, gpu, NULL, mem));
else
TEST_NV_CHECK_RET(uvm_mem_alloc_sysmem(size, NULL, mem));
TEST_NV_CHECK_GOTO(uvm_mem_map_cpu_kernel(*mem), err);
TEST_NV_CHECK_GOTO(uvm_mem_map_gpu_kernel(*mem, gpu), err);
write_range_cpu(*mem, size, 0xdeaddead);
@ -405,48 +410,6 @@ static void gpu_decrypt(uvm_push_t *push,
}
}
// This test only uses sysmem so that we can use the CPU for encryption and SEC2
// for decryption, i.e., the test doesn't depend on any other GPU engine for
// the encryption operation (refer to test_cpu_to_gpu_roundtrip()). This is not
// how SEC2 is used in the driver. The intended SEC2 usage is to decrypt from
// unprotected sysmem to protected vidmem, which is tested in
// test_cpu_to_gpu_roundtrip().
static NV_STATUS test_cpu_to_gpu_sysmem(uvm_gpu_t *gpu, size_t copy_size, size_t size)
{
NV_STATUS status = NV_OK;
uvm_mem_t *src_plain = NULL;
uvm_mem_t *cipher = NULL;
uvm_mem_t *dst_plain = NULL;
uvm_mem_t *auth_tag_mem = NULL;
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
uvm_push_t push;
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
write_range_cpu(dst_plain, size, 0xA5A5A5A5);
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_SEC2, &push, "enc(cpu)_dec(gpu)"), out);
cpu_encrypt(push.channel, cipher, src_plain, auth_tag_mem, size, copy_size);
gpu_decrypt(&push, dst_plain, cipher, auth_tag_mem, size, copy_size);
uvm_push_end_and_wait(&push);
TEST_CHECK_GOTO(mem_match(src_plain, dst_plain), out);
out:
uvm_mem_free(auth_tag_mem);
uvm_mem_free(cipher);
uvm_mem_free(dst_plain);
uvm_mem_free(src_plain);
return status;
}
// This test depends on the CE for the encryption, so we assume tests from
// uvm_ce_test.c have successfully passed.
static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, size_t size)
@ -461,19 +424,16 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
size_t auth_tag_buffer_size = (size / copy_size) * UVM_CONF_COMPUTING_AUTH_TAG_SIZE;
uvm_push_t push;
UvmCslIv *decrypt_iv;
uvm_tracker_t tracker;
decrypt_iv = uvm_kvmalloc_zero((size / copy_size) * sizeof(UvmCslIv));
if (!decrypt_iv)
return NV_ERR_NO_MEMORY;
uvm_tracker_init(&tracker);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_plain, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &src_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_cipher, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain, size, MEM_ALLOC_TYPE_VIDMEM_PROTECTED), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &dst_plain_cpu, size, MEM_ALLOC_TYPE_SYSMEM_PROTECTED), out);
TEST_NV_CHECK_GOTO(alloc_and_init_mem(gpu, &auth_tag_mem, auth_tag_buffer_size, MEM_ALLOC_TYPE_SYSMEM_DMA), out);
write_range_cpu(src_plain, size, uvm_get_stale_thread_id());
@ -483,14 +443,13 @@ static NV_STATUS test_cpu_to_gpu_roundtrip(uvm_gpu_t *gpu, size_t copy_size, siz
cpu_encrypt(push.channel, src_cipher, src_plain, auth_tag_mem, size, copy_size);
gpu_decrypt(&push, dst_plain, src_cipher, auth_tag_mem, size, copy_size);
uvm_push_end(&push);
TEST_NV_CHECK_GOTO(uvm_tracker_add_push(&tracker, &push), out);
TEST_NV_CHECK_GOTO(uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_GPU_TO_CPU,
&tracker,
&push,
"enc(gpu)_dec(cpu)"),
// Wait for SEC2 before launching the CE part.
// SEC2 is only allowed to release semaphores in unprotected sysmem,
// and CE can only acquire semaphores in protected vidmem.
TEST_NV_CHECK_GOTO(uvm_push_end_and_wait(&push), out);
TEST_NV_CHECK_GOTO(uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_GPU_TO_CPU, &push, "enc(gpu)_dec(cpu)"),
out);
gpu_encrypt(&push, dst_cipher, dst_plain, decrypt_iv, auth_tag_mem, size, copy_size);
@ -521,8 +480,6 @@ out:
uvm_kvfree(decrypt_iv);
uvm_tracker_deinit(&tracker);
return status;
}
@ -545,7 +502,6 @@ static NV_STATUS test_encryption_decryption(uvm_gpu_t *gpu)
UVM_ASSERT(size % copy_sizes[i] == 0);
TEST_NV_CHECK_RET(test_cpu_to_gpu_sysmem(gpu, copy_sizes[i], size));
TEST_NV_CHECK_RET(test_cpu_to_gpu_roundtrip(gpu, copy_sizes[i], size));
}

View File

@ -69,6 +69,14 @@ static NV_STATUS test_tracker_completion(uvm_va_space_t *va_space)
gpu = uvm_va_space_find_first_gpu(va_space);
TEST_CHECK_RET(gpu != NULL);
// TODO: Bug 4008734: [UVM][HCC] Extend secure tracking semaphore mechanism
// to all semaphore
// This test allocates semaphore in vidmem and then releases it from the CPU
// SEC2 channels cannot target semaphores in vidmem. Moreover, CPU cannot
// directly release values to vidmem for CE channels.
if (uvm_conf_computing_mode_enabled(gpu))
return NV_OK;
TEST_NV_CHECK_RET(uvm_gpu_semaphore_alloc(gpu->semaphore_pool, &sema));
uvm_tracker_init(&tracker);

View File

@ -7189,6 +7189,7 @@ static NV_STATUS block_map_gpu_to(uvm_va_block_t *va_block,
}
static void map_get_allowed_destinations(uvm_va_block_t *block,
uvm_va_block_context_t *va_block_context,
const uvm_va_policy_t *policy,
uvm_processor_id_t id,
uvm_processor_mask_t *allowed_mask)
@ -7200,7 +7201,10 @@ static void map_get_allowed_destinations(uvm_va_block_t *block,
uvm_processor_mask_zero(allowed_mask);
uvm_processor_mask_set(allowed_mask, policy->preferred_location);
}
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) || uvm_id_equal(policy->preferred_location, id)) &&
else if ((uvm_va_policy_is_read_duplicate(policy, va_space) ||
(uvm_id_equal(policy->preferred_location, id) &&
!is_uvm_fault_force_sysmem_set() &&
!uvm_hmm_must_use_sysmem(block, va_block_context))) &&
uvm_va_space_processor_has_memory(va_space, id)) {
// When operating under read-duplication we should only map the local
// processor to cause fault-and-duplicate of remote pages.
@ -7285,7 +7289,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
// Map per resident location so we can more easily detect physically-
// contiguous mappings.
map_get_allowed_destinations(va_block, va_block_context->policy, id, &allowed_destinations);
map_get_allowed_destinations(va_block, va_block_context, va_block_context->policy, id, &allowed_destinations);
for_each_closest_id(resident_id, &allowed_destinations, id, va_space) {
if (UVM_ID_IS_CPU(id)) {

View File

@ -418,15 +418,6 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_global_processor_mask_t retained_gpus;
LIST_HEAD(deferred_free_list);
// Normally we'd expect this to happen as part of uvm_mm_release()
// but if userspace never initialized uvm_mm_fd that won't happen.
// We don't have to take the va_space_mm spinlock and update state
// here because we know no other thread can be in or subsequently
// call uvm_api_mm_initialize successfully because the UVM
// file-descriptor has been released.
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
uvm_va_space_mm_unregister(va_space);
// Remove the VA space from the global list before we start tearing things
// down so other threads can't see the VA space in a partially-valid state.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@ -532,7 +523,14 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_deferred_free_object_list(&deferred_free_list);
// MM FD teardown should already have destroyed va_space_mm
// Normally we'd expect this to happen as part of uvm_mm_release()
// but if userspace never initialized uvm_mm_fd that won't happen.
// We don't have to take the va_space_mm spinlock and update state
// here because we know no other thread can be in or subsequently
// call uvm_api_mm_initialize successfully because the UVM
// file-descriptor has been released.
if (va_space->va_space_mm.state == UVM_VA_SPACE_MM_STATE_UNINITIALIZED)
uvm_va_space_mm_unregister(va_space);
UVM_ASSERT(!uvm_va_space_mm_alive(&va_space->va_space_mm));
uvm_mutex_lock(&g_uvm_global.global_lock);

View File

@ -1396,6 +1396,8 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
nv->flags |= NV_FLAG_OPEN;
rm_request_dnotifier_state(sp, nv);
/*
* Now that RM init is done, allow dynamic power to control the GPU in FINE
* mode, if enabled. (If the mode is COARSE, this unref will do nothing

View File

@ -290,10 +290,6 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
gpuChannelHandle channel);
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslLogDeviceEncryption(struct ccslContext_t *ctx,
NvU8 *decryptIv);
NV_STATUS nvGpuOpsCcslAcquireEncryptionIv(struct ccslContext_t *ctx,
NvU8 *encryptIv);
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
NvU8 direction);
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
@ -312,6 +308,8 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagBuffer);
NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
NvU32 bufferSize,
@ -320,5 +318,9 @@ NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx,
NvU8 direction,
NvU64 *messageNum);
NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
NvU8 direction,
NvU64 increment,
NvU8 *iv);
#endif /* _NV_GPU_OPS_H_*/

View File

@ -1504,44 +1504,18 @@ void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext)
}
EXPORT_SYMBOL(nvUvmInterfaceDeinitCslContext);
NV_STATUS nvUvmInterfaceCslLogDeviceEncryption(UvmCslContext *uvmCslContext,
UvmCslIv *decryptIv)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
status = rm_gpu_ops_ccsl_log_device_encryption(sp, uvmCslContext->ctx, (NvU8 *)decryptIv);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslLogDeviceEncryption);
NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
UvmCslDirection direction)
UvmCslOperation operation)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, direction);
status = rm_gpu_ops_ccsl_rotate_iv(sp, uvmCslContext->ctx, operation);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslRotateIv);
NV_STATUS nvUvmInterfaceCslAcquireEncryptionIv(UvmCslContext *uvmCslContext,
UvmCslIv *encryptIv)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
BUILD_BUG_ON(NV_OFFSETOF(UvmCslIv, fresh) != sizeof(encryptIv->iv));
status = rm_gpu_ops_ccsl_acquire_encryption_iv(sp, uvmCslContext->ctx, (NvU8*)encryptIv);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslAcquireEncryptionIv);
NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
@ -1566,6 +1540,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagBuffer)
{
NV_STATUS status;
@ -1577,6 +1553,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
inputBuffer,
(NvU8 *)decryptIv,
outputBuffer,
addAuthData,
addAuthDataSize,
authTagBuffer);
return status;
@ -1598,18 +1576,32 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
EXPORT_SYMBOL(nvUvmInterfaceCslSign);
NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
UvmCslDirection direction,
UvmCslOperation operation,
NvU64 *messageNum)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, direction, messageNum);
status = rm_gpu_ops_ccsl_query_message_pool(sp, uvmCslContext->ctx, operation, messageNum);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslQueryMessagePool);
NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU64 increment,
UvmCslIv *iv)
{
NV_STATUS status;
nvidia_stack_t *sp = uvmCslContext->nvidia_stack;
status = rm_gpu_ops_ccsl_increment_iv(sp, uvmCslContext->ctx, operation, increment, (NvU8 *)iv);
return status;
}
EXPORT_SYMBOL(nvUvmInterfaceCslIncrementIv);
#else // NV_UVM_ENABLE
NV_STATUS nv_uvm_suspend(void)

View File

@ -30,6 +30,7 @@
#define GPS_FUNC_SUPPORT 0x00000000 // Bit list of supported functions
#define GPS_FUNC_GETOBJBYTYPE 0x00000010 // Fetch any specific Object by Type
#define GPS_FUNC_GETALLOBJS 0x00000011 // Fetch all Objects
#define GPS_FUNC_REQUESTDXSTATE 0x00000012 // Request D-Notifier state
#define GPS_FUNC_GETCALLBACKS 0x00000013 // Get system requested callbacks
#define GPS_FUNC_PCONTROL 0x0000001C // GPU power control function
#define GPS_FUNC_PSHARESTATUS 0x00000020 // Get system requested Power Steering settings

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r535_87
#define NV_BUILD_BRANCH r536_08
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r535_87
#define NV_PUBLIC_BRANCH r536_08
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r535_87-122"
#define NV_BUILD_CHANGELIST_NUM (32882771)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r536_08-145"
#define NV_BUILD_CHANGELIST_NUM (32940552)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r535_87-122"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32882771)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r536_08-145"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32940552)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r535_87-4"
#define NV_BUILD_CHANGELIST_NUM (32875904)
#define NV_BUILD_BRANCH_VERSION "r536_08-4"
#define NV_BUILD_CHANGELIST_NUM (32940552)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "535.93"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32875904)
#define NV_BUILD_NAME "536.19"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (32940552)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.43.02"
#define NV_VERSION_STRING "535.54.03"
#else

View File

@ -120,12 +120,14 @@ struct _NVLOG_BUFFER
#if !PORT_IS_KERNEL_BUILD
typedef struct PORT_SPINLOCK PORT_SPINLOCK;
typedef struct PORT_MUTEX PORT_MUTEX;
#else
#include "nvport/nvport.h"
#endif
#elif !defined(PORT_IS_KERNEL_BUILD)
typedef struct PORT_SPINLOCK PORT_SPINLOCK;
typedef struct PORT_MUTEX PORT_MUTEX;
#else
#include "nvport/nvport.h"
#endif
@ -143,11 +145,33 @@ typedef struct _NVLOG_LOGGER
NvU32 nextFree;
/** Total number of free buffer slots */
NvU32 totalFree;
/** Lock for all buffer oprations */
/** Lock for some buffer oprations */
PORT_SPINLOCK* mainLock;
/** Lock for creating/deleting pBuffers and accessing them from RmCtrls */
PORT_MUTEX* buffersLock;
} NVLOG_LOGGER;
extern NVLOG_LOGGER NvLogLogger;
/**
* NvLog uses two locks:
* - NVLOG_LOGGER::mainLock is used to protect some accesses to pBuffers, or
* an individual pBuffers entry depending on locking flags.
* - NVLOG_LOGGER::buffersLock is used to protect creating/deleting pBuffers and accessing them
* from certain RmCtrl handlers.
*
* Historically in most contexts obtaining RMAPI lock would suffice, and mainLock would optionally
* be used for certain buffers. Ioctl NV_ESC_RM_NVLOG_CTRL cannot touch RMAPI lock and needs
* to access NvLog. The latter operation might race if called at an inopportune time: e.g. if the
* ioctl is called during RM init when KGSP creates/deletes GSP NvLog buffers. Using buffersLock is
* thus necessary to resolve the potential race.
*
* This leads to an unfortunate sequence where mainLock and buffersLock are nested. The latter lock
* cannot be removed as it is used in IRQ paths.
*
* This should be refactored to use a single RWLock that does conditional acquire in possible IRQ
* paths.
*/
//
// Buffer flags
//

View File

@ -39,8 +39,12 @@
#define NV_EP_PCFG_GPU_REVISION_ID_AND_CLASSCODE_BASE_CLASSCODE 31:24 /* R-IVF */
#define NV_EP_PCFG_GPU_REVISION_ID_AND_CLASSCODE_BASE_CLASSCODE_3D 0x00000003 /* R-I-V */
#define NV_EP_PCFG_GPU_BARREG0 0x00000010 /* RW-4R */
#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE 2:1 /* R-IVF */
#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE_32BIT 0x00000000 /* R-I-V */
#define NV_EP_PCFG_GPU_BARREG0_REG_ADDR_TYPE_64BIT 0x00000002 /* R---V */
#define NV_EP_PCFG_GPU_BARREG0_REG_BASE_ADDRESS 31:18 /* RWIVF */
#define NV_EP_PCFG_GPU_BARREG0_REG_BASE_ADDRESS_INIT 0x00000000 /* RWI-V */
#define NV_EP_PCFG_GPU_BARREG5 0x00000024 /* RW-4R */
#define NV_EP_PCFG_GPU_SUBSYSTEM_ID 0x0000002C /* R--4R */
#define NV_EP_PCFG_GPU_MSI_64_HEADER 0x00000048 /* RW-4R */
#define NV_EP_PCFG_GPU_MSI_64_HEADER_MSI_ENABLE 16:16 /* RWIVF */
@ -79,6 +83,11 @@
#define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_REPLAY_NUM_ROLLOVER 8:8 /* RWCVF */
#define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_REPLAY_TIMER_TIMEOUT 12:12 /* RWCVF */
#define NV_EP_PCFG_GPU_CORRECTABLE_ERROR_STATUS_ADVISORY_NON_FATAL_ERROR 13:13 /* RWCVF */
#define NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF 0x0000025C /* R--4R */
#define NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF_TOTAL_VFS 31:16 /* R-EVF */
#define NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE 0x00000264 /* R--4R */
#define NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE_FIRST_VF_OFFSET 15:0 /* R-IVF */
#define NV_EP_PCFG_GPU_VF_BAR0 0x00000274 /* RW-4R */
#define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC 0x000002B4 /* R--4R */
#define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC_FAULT_FUSE_POD 0:0 /* R-CVF */
#define NV_EP_PCFG_GPU_VSEC_DEBUG_SEC_FAULT_FUSE_SCPM 1:1 /* R-CVF */

View File

@ -23,9 +23,18 @@
#ifndef __tu102_dev_nv_xve_h__
#define __tu102_dev_nv_xve_h__
#define NV_PCFG 0x00088FFF:0x00088000 /* RW--D */
#define NV_PCFG 0x00088FFF:0x00088000 /* RW--D */
#define NV_XVE_MSIX_CAP_HDR 0x000000C8 /* RW-4R */
#define NV_XVE_MSIX_CAP_HDR_ENABLE 31:31 /* RWIVF */
#define NV_XVE_MSIX_CAP_HDR_ENABLE_ENABLED 0x00000001 /* RW--V */
#define NV_XVE_MSIX_CAP_HDR_ENABLE_DISABLED 0x00000000 /* RWI-V */
#define NV_XVE_SRIOV_CAP_HDR3 0x00000BD8 /* R--4R */
#define NV_XVE_SRIOV_CAP_HDR3_TOTAL_VFS 31:16 /* R-EVF */
#define NV_XVE_SRIOV_CAP_HDR5 0x00000BE0 /* R--4R */
#define NV_XVE_SRIOV_CAP_HDR5_FIRST_VF_OFFSET 15:0 /* R-IVF */
#define NV_XVE_SRIOV_CAP_HDR9 0x00000BF0 /* RW-4R */
#define NV_XVE_SRIOV_CAP_HDR10 0x00000BF4 /* RW-4R */
#define NV_XVE_SRIOV_CAP_HDR11_VF_BAR1_HI 0x00000BF8 /* RW-4R */
#define NV_XVE_SRIOV_CAP_HDR12 0x00000BFC /* RW-4R */
#define NV_XVE_SRIOV_CAP_HDR13_VF_BAR2_HI 0x00000C00 /* RW-4R */
#endif // __tu102_dev_nv_xve_h__

View File

@ -1505,7 +1505,7 @@ nvswitch_reset_and_drain_links_ls10
// DEBUG_CLEAR (0x144) register
// - Assert NPortWarmReset[i] using the WARMRESET (0x140) register
//
// nvswitch_soe_issue_nport_reset_ls10(device, link);
nvswitch_soe_issue_nport_reset_ls10(device, link);
//
// Step 5.0 : Issue Minion request to perform the link reset sequence
@ -1583,7 +1583,7 @@ nvswitch_reset_and_drain_links_ls10
// - Assert NPORT INITIALIZATION and program the state tracking RAMS
// - Restore NPORT state after reset
//
// nvswitch_soe_restore_nport_state_ls10(device, link);
nvswitch_soe_restore_nport_state_ls10(device, link);
// Step 7.0 : Re-program the routing table for DBEs

View File

@ -72,6 +72,19 @@
* identity mapped. To use this feature, users need to pass in the
* hVaspace with identity mapped addresses for the entire memory during
* construct.
* PIPELINED
* This flag allows the copy/memset operation to be pipelined with previous dma operations on the same channel
* It means that its reads/writes are allowed happen before writes of preceding operations are tlb-acked
* The flag can be useful when dealing with non-inersecting async operations,
* but it can result in races when 2 async CE operations target the same allocation, and the second operation uses the flag
* Race example:
* 1. async copy A -> B
* 2. pipelined copy B -> C
* Here copy 2 can read B before copy finishes writing it, which will result in C containing invalid data
* Technical details:
* By default, first _LAUNCH_DMA method of a CE operation is marked has _TRANSFER_TYPE_NON_PIPELINED, which the flag overrides
* Subsequent _LAUNCH_DMA methods belonging to the same operation use _TRANSFER_TYPE_PIPELINED, as each of these methods should
* target different addresses
*
* submittedWorkId [OUT]
* The work submission token users can poll on to wait for work
@ -79,8 +92,9 @@
*/
#define NV0050_CTRL_MEMSET_FLAGS_DEFAULT 0
#define NV0050_CTRL_MEMSET_FLAGS_ASYNC NVBIT(0)
#define NV0050_CTRL_MEMSET_FLAGS_VIRTUAL NVBIT(1)
#define NV0050_CTRL_MEMSET_FLAGS_ASYNC NVBIT(0)
#define NV0050_CTRL_MEMSET_FLAGS_VIRTUAL NVBIT(1)
#define NV0050_CTRL_MEMSET_FLAGS_PIPELINED NVBIT(2)
#define NV0050_CTRL_CMD_MEMSET (0x500101U) /* finn: Evaluated from "(FINN_NV_CE_UTILS_UTILS_INTERFACE_ID << 8) | NV0050_CTRL_MEMSET_PARAMS_MESSAGE_ID" */
@ -98,7 +112,7 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
/*
* NV0050_CTRL_CMD_MEMCOPY
*
* Copies from a source memoryto ssdestination memory and releases a semaphore
* Copies from a source memoryto ssdestination memory and releases a semaphore
* on completion
*
* hDstMemory [IN]
@ -131,6 +145,19 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
* identity mapped. To use this feature, users need to pass in the
* hVaspace with identity mapped addresses for the entire memory during
* construct.
* PIPELINED
* This flag allows the copy/memset operation to be pipelined with previous dma operations on the same channel
* It means that its reads/writes are allowed happen before writes of preceding operations are tlb-acked
* The flag can be useful when dealing with non-inersecting async operations,
* but it can result in races when 2 async CE operations target the same allocation, and the second operation uses the flag
* Race example:
* 1. async copy A -> B
* 2. pipelined copy B -> C
* Here copy 2 can read B before copy finishes writing it, which will result in C containing invalid data
* Technical details:
* By default, first _LAUNCH_DMA method of a CE operation is marked has _TRANSFER_TYPE_NON_PIPELINED, which the flag overrides
* Subsequent _LAUNCH_DMA methods belonging to the same operation use _TRANSFER_TYPE_PIPELINED, as each of these methods should
* target different addresses
*
* submittedWorkId [OUT]
* The work submission token users can poll on to wait for work
@ -138,8 +165,9 @@ typedef struct NV0050_CTRL_MEMSET_PARAMS {
*/
#define NV0050_CTRL_MEMCOPY_FLAGS_DEFAULT 0
#define NV0050_CTRL_MEMCOPY_FLAGS_ASYNC NVBIT(1)
#define NV0050_CTRL_MEMCOPY_FLAGS_VIRTUAL NVBIT(2)
#define NV0050_CTRL_MEMCOPY_FLAGS_ASYNC NVBIT(0)
#define NV0050_CTRL_MEMCOPY_FLAGS_VIRTUAL NVBIT(1)
#define NV0050_CTRL_MEMCOPY_FLAGS_PIPELINED NVBIT(2)
#define NV0050_CTRL_CMD_MEMCOPY (0x500102U) /* finn: Evaluated from "(FINN_NV_CE_UTILS_UTILS_INTERFACE_ID << 8 | NV0050_CTRL_MEMCOPY_PARAMS_MESSAGE_ID)" */
@ -157,12 +185,12 @@ typedef struct NV0050_CTRL_MEMCOPY_PARAMS {
/*
* NV0050_CTRL_CMD_CHECK_PROGRESS
*
*
* Check if a previously submitted work item has been completed by HW.
*
* submittedWorkId [IN]
* The work submission token users can poll on to wait for work
* completed by CE.
* completed by CE.
*
*/
#define NV0050_CTRL_CHECK_PROGRESS_RESULT_DEFAULT 0

View File

@ -208,11 +208,11 @@ typedef struct NV2080_CTRL_EVENT_SET_SEMAPHORE_MEMORY_PARAMS {
* guestMSIData
* This parameter indicates the MSI data set by the guest OS.
*
* vmIdType
* This parameter specifies the type of guest virtual machine identifier
* vgpuUuid
* This parameter specifies the uuid of vGPU assigned to VM.
*
* guestVmId
* This parameter specifies the guest virtual machine identifier
* domainId
* This parameter specifies the unique guest virtual machine identifier
*
* Possible status values returned are:
* NV_OK
@ -225,11 +225,11 @@ typedef struct NV2080_CTRL_EVENT_SET_SEMAPHORE_MEMORY_PARAMS {
typedef struct NV2080_CTRL_EVENT_SET_GUEST_MSI_PARAMS {
NV_DECLARE_ALIGNED(NvU64 guestMSIAddr, 8);
NvU32 guestMSIData;
NvHandle hSemMemory;
NvBool isReset;
VM_ID_TYPE vmIdType;
NV_DECLARE_ALIGNED(VM_ID guestVmId, 8);
NvU32 guestMSIData;
NvHandle hSemMemory;
NvBool isReset;
NvU8 vgpuUuid[VM_UUID_SIZE];
NV_DECLARE_ALIGNED(NvU64 domainId, 8);
} NV2080_CTRL_EVENT_SET_GUEST_MSI_PARAMS;

View File

@ -233,6 +233,13 @@ NvBool nvValidateSetLutCommonParams(
const NVDevEvoRec *pDevEvo,
const struct NvKmsSetLutCommonParams *pParams);
NvBool nvChooseColorRangeEvo(
enum NvKmsOutputTf tf,
const enum NvKmsDpyAttributeColorRangeValue requestedColorRange,
const enum NvKmsDpyAttributeCurrentColorSpaceValue colorSpace,
const enum NvKmsDpyAttributeColorBpcValue colorBpc,
enum NvKmsDpyAttributeColorRangeValue *pColorRange);
NvBool nvChooseCurrentColorSpaceAndRangeEvo(
const NVDpyEvoRec *pDpyEvo,
enum NvYuv420Mode yuv420Mode,

View File

@ -2111,6 +2111,38 @@ NvBool nvGetDefaultColorSpace(
return FALSE;
}
NvBool nvChooseColorRangeEvo(
enum NvKmsOutputTf tf,
const enum NvKmsDpyAttributeColorRangeValue requestedColorRange,
const enum NvKmsDpyAttributeCurrentColorSpaceValue colorSpace,
const enum NvKmsDpyAttributeColorBpcValue colorBpc,
enum NvKmsDpyAttributeColorRangeValue *pColorRange)
{
/* Hardware supports BPC_6 only for RGB */
nvAssert((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) ||
(colorBpc != NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6));
if ((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) &&
(colorBpc == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6)) {
/* At depth 18 only RGB and full range are allowed */
if (tf == NVKMS_OUTPUT_TF_PQ) {
/* NVKMS_OUTPUT_TF_PQ requires limited color range */
return FALSE;
}
*pColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_FULL;
} else if ((colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr444) ||
(colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr422) ||
(colorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr420) ||
(tf == NVKMS_OUTPUT_TF_PQ)) {
/* Both YUV and NVKMS_OUTPUT_TF_PQ requires limited color range. */
*pColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_LIMITED;
} else {
*pColorRange = requestedColorRange;
}
return TRUE;
}
/*!
* Choose current colorSpace and colorRange for the given dpy based on
* the dpy's color format capailities, the given modeset parameters (YUV420
@ -2206,23 +2238,9 @@ NvBool nvChooseCurrentColorSpaceAndRangeEvo(
}
}
/* Hardware supports BPC_6 only for RGB */
nvAssert((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) ||
(newColorBpc != NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6));
/*
* Both YUV and NVKMS_OUTPUT_TF_PQ requires limited color range.
*/
if ((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr444) ||
(newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr422) ||
(newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_YCbCr420) ||
(tf == NVKMS_OUTPUT_TF_PQ)) {
newColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_LIMITED;
} else if ((newColorSpace == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) &&
(newColorBpc == NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_BPC_6)) {
/* At depth 18 only RGB and full range are allowed */
newColorRange = NV_KMS_DPY_ATTRIBUTE_COLOR_RANGE_FULL;
} else {
newColorRange = requestedColorRange;
if (!nvChooseColorRangeEvo(tf, requestedColorRange, newColorSpace,
newColorBpc, &newColorRange)) {
return FALSE;
}
*pCurrentColorSpace = newColorSpace;

View File

@ -146,31 +146,23 @@ static NvBool UpdateProposedFlipStateOneApiHead(
if (!nvIsHDRCapableHead(pDispEvo, apiHead)) {
return FALSE;
}
/* NVKMS_OUTPUT_TF_PQ requires the RGB color space */
if (pProposedApiHead->hdr.colorSpace !=
NV_KMS_DPY_ATTRIBUTE_CURRENT_COLOR_SPACE_RGB) {
return FALSE;
}
}
if (!nvChooseCurrentColorSpaceAndRangeEvo(pDpyEvo,
pApiHeadState->timings.yuv420Mode,
pParams->tf.val,
pDpyEvo->requestedColorSpace,
pDpyEvo->requestedColorRange,
&pProposedApiHead->hdr.colorSpace,
&pProposedApiHead->hdr.colorBpc,
&pProposedApiHead->hdr.colorRange)) {
if (!nvChooseColorRangeEvo(pParams->tf.val,
pDpyEvo->requestedColorRange,
pProposedApiHead->hdr.colorSpace,
pProposedApiHead->hdr.colorBpc,
&pProposedApiHead->hdr.colorRange)) {
return FALSE;
}
}
/*
* Change in colorSpace and colorBpc is not handled. For DisplayPort,
* colorSpace and colorBpc can not be changed without a modeset.
*/
if ((pProposedApiHead->hdr.colorSpace !=
pApiHeadState->attributes.colorSpace) ||
(pProposedApiHead->hdr.colorBpc !=
pApiHeadState->attributes.colorBpc)) {
return FALSE;
}
if (pParams->viewPortIn.specified) {
pProposedApiHead->dirty.viewPortPointIn = TRUE;
pProposedApiHead->viewPortPointIn = pParams->viewPortIn.point;

View File

@ -691,6 +691,10 @@ static void SetHdmiAudioMute(const NVDispEvoRec *pDispEvo,
static void EnableHdmiAudio(const NVDispEvoRec *pDispEvo,
const NvU32 head, const NvBool enable)
{
/*
* XXX Is it correct to use pktType_GeneralControl to mute/unmute
* the audio? pktType_GeneralControl controls both the audio and video data.
*/
static const NvU8 InfoframeMutePacket[] = {
pktType_GeneralControl, 0, 0, HDMI_GENCTRL_PACKET_MUTE_ENABLE, 0, 0, 0, 0,
0, 0
@ -998,12 +1002,13 @@ void nvHdmiDpConstructHeadAudioState(const NvU32 displayId,
return;
}
pAudioState->isAudioOverHdmi = nvDpyIsHdmiEvo(pDpyEvo);
if (FillELDBuffer(displayId,
nvConnectorUsesDPLib(pDpyEvo->pConnectorEvo),
&pDpyEvo->parsedEdid,
&pAudioState->eld,
&pAudioState->maxFreqSupported)) {
pAudioState->isAudioOverHdmi = nvDpyIsHdmiEvo(pDpyEvo);
pAudioState->enabled = TRUE;
}
}
@ -1197,37 +1202,25 @@ void nvHdmiDpEnableDisableAudio(const NVDispEvoRec *pDispEvo,
return;
}
if (!pHeadState->audio.enabled) {
if (enable) {
/* Make sure to remove corresponding audio device */
if (!enable) {
/*
* This is pre modeset code path. If audio device is enabled
* (pHeadState->audio.enabled == TRUE) then invalidate ELD buffer
* before disabling audio.
*/
if (pHeadState->audio.enabled) {
RmSetELDAudioCaps(pDispEvo,
pConnectorEvo,
nvDpyIdToNvU32(pConnectorEvo->displayId),
pHeadState->activeRmId,
deviceEntry,
0 /* maxFreqSupported */,
NULL /* pEld */,
NV_ELD_POWER_ON_RESET);
} else {
/* Do nothing. The audio device is already in the disabled state. */
NV_ELD_PRE_MODESET);
if (nvConnectorUsesDPLib(pConnectorEvo)) {
SetDpAudioEnable(pDispEvo, head, FALSE /* enable */);
}
}
return;
}
/* Invalidate ELD buffer before disabling audio */
if (!enable) {
RmSetELDAudioCaps(pDispEvo,
pConnectorEvo,
pHeadState->activeRmId,
deviceEntry,
0 /* maxFreqSupported */,
NULL /* pEld */,
NV_ELD_PRE_MODESET);
}
if (nvConnectorUsesDPLib(pConnectorEvo)) {
SetDpAudioEnable(pDispEvo, head, enable);
}
if (pHeadState->audio.isAudioOverHdmi) {
@ -1236,15 +1229,34 @@ void nvHdmiDpEnableDisableAudio(const NVDispEvoRec *pDispEvo,
SendHdmiGcp(pDispEvo, head, !enable /* avmute */);
}
/* Populate ELD buffer after enabling audio */
if (enable) {
RmSetELDAudioCaps(pDispEvo,
pConnectorEvo,
pHeadState->activeRmId,
deviceEntry,
pHeadState->audio.maxFreqSupported,
&pHeadState->audio.eld,
NV_ELD_POST_MODESET);
/*
* This is post modeset code path. If audio device is enabled
* (pHeadState->audio.enabled == TRUE) then populate ELD buffer after
* enabling audio, otherwise make sure to remove corresponding audio
* device.
*/
if (pHeadState->audio.enabled) {
if (nvConnectorUsesDPLib(pConnectorEvo)) {
SetDpAudioEnable(pDispEvo, head, TRUE /* enable */);
}
RmSetELDAudioCaps(pDispEvo,
pConnectorEvo,
pHeadState->activeRmId,
deviceEntry,
pHeadState->audio.maxFreqSupported,
&pHeadState->audio.eld,
NV_ELD_POST_MODESET);
} else {
RmSetELDAudioCaps(pDispEvo,
pConnectorEvo,
nvDpyIdToNvU32(pConnectorEvo->displayId),
deviceEntry,
0 /* maxFreqSupported */,
NULL /* pEld */,
NV_ELD_POWER_ON_RESET);
}
}
}

View File

@ -0,0 +1,43 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#if !defined(NV_IOCTL_NVLOG)
#define NV_IOCTL_NVLOG
#include <nvtypes.h>
#include "ctrl/ctrl0000/ctrl0000nvd.h"
typedef struct
{
NvU32 ctrl; // in
NvU32 status; // out
union // in/out
{
NV0000_CTRL_NVD_GET_NVLOG_INFO_PARAMS getNvlogInfo;
NV0000_CTRL_NVD_GET_NVLOG_BUFFER_INFO_PARAMS getNvlogBufferInfo;
NV0000_CTRL_NVD_GET_NVLOG_PARAMS getNvlog;
} params;
} NV_NVLOG_CTRL_PARAMS;
#endif

View File

@ -510,6 +510,12 @@ struct nv_file_private_t
nv_file_private_t *ctl_nvfp;
void *ctl_nvfp_priv;
NvU32 register_or_refcount;
//
// True if a client or an event was ever allocated on this fd.
// If false, RMAPI cleanup is skipped.
//
NvBool bCleanupRmapi;
};
// Forward define the gpu ops structures
@ -959,6 +965,8 @@ NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
void NV_API_CALL rm_request_dnotifier_state (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
NV_STATUS NV_API_CALL rm_p2p_destroy_mapping (nvidia_stack_t *, NvU64);

View File

@ -50,5 +50,6 @@
#define NV_ESC_RM_EXPORT_OBJECT_TO_FD 0x5C
#define NV_ESC_RM_IMPORT_OBJECT_FROM_FD 0x5D
#define NV_ESC_RM_UPDATE_DEVICE_MAPPING_INFO 0x5E
#define NV_ESC_RM_NVLOG_CTRL 0x5F
#endif // NV_ESCAPE_H_INCLUDED

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -130,6 +130,8 @@ NV_STATUS RmInitX86EmuState(OBJGPU *);
void RmFreeX86EmuState(OBJGPU *);
NV_STATUS RmPowerSourceChangeEvent(nv_state_t *, NvU32);
void RmRequestDNotifierState(nv_state_t *);
const NvU8 *RmGetGpuUuidRaw(nv_state_t *);
NV_STATUS nv_vbios_call(OBJGPU *, NvU32 *, NvU32 *);

View File

@ -2177,6 +2177,7 @@ RmPowerManagementInternal(
//
RmPowerSourceChangeEvent(nv, !ac_plugged);
}
RmRequestDNotifierState(nv);
}
break;

View File

@ -46,6 +46,10 @@
#include <class/cl003e.h> // NV01_MEMORY_SYSTEM
#include <class/cl0071.h> // NV01_MEMORY_SYSTEM_OS_DESCRIPTOR
#include "rmapi/client_resource.h"
#include "nvlog/nvlog.h"
#include <nv-ioctl-nvlog.h>
#include <ctrl/ctrl00fd.h>
#define NV_CTL_DEVICE_ONLY(nv) \
@ -839,6 +843,40 @@ NV_STATUS RmIoctl(
break;
}
case NV_ESC_RM_NVLOG_CTRL:
{
NV_NVLOG_CTRL_PARAMS *pParams = data;
NV_CTL_DEVICE_ONLY(nv);
if (!osIsAdministrator())
{
rmStatus = NV_ERR_INSUFFICIENT_PERMISSIONS;
pParams->status = rmStatus;
goto done;
}
switch (pParams->ctrl)
{
// Do not use NVOC _DISPATCH here as it dereferences NULL RmClientResource*
case NV0000_CTRL_CMD_NVD_GET_NVLOG_INFO:
rmStatus = cliresCtrlCmdNvdGetNvlogInfo_IMPL(NULL, &pParams->params.getNvlogInfo);
break;
case NV0000_CTRL_CMD_NVD_GET_NVLOG_BUFFER_INFO:
rmStatus = cliresCtrlCmdNvdGetNvlogBufferInfo_IMPL(NULL, &pParams->params.getNvlogBufferInfo);
break;
case NV0000_CTRL_CMD_NVD_GET_NVLOG:
rmStatus = cliresCtrlCmdNvdGetNvlog_IMPL(NULL, &pParams->params.getNvlog);
break;
default:
rmStatus = NV_ERR_NOT_SUPPORTED;
break;
}
pParams->status = rmStatus;
goto done;
}
case NV_ESC_REGISTER_FD:
{
nv_ioctl_register_fd_t *params = data;

View File

@ -5415,6 +5415,14 @@ osDmabufIsSupported(void)
return os_dma_buf_enabled;
}
void osAllocatedRmClient(void *pOsInfo)
{
nv_file_private_t* nvfp = (nv_file_private_t*)pOsInfo;
if (nvfp != NULL)
nvfp->bCleanupRmapi = NV_TRUE;
}
NV_STATUS
osGetEgmInfo
(

View File

@ -31,6 +31,7 @@
#include <class/cl0000.h>
#include <rmosxfac.h> // Declares RmInitRm().
#include "gpu/gpu.h"
#include "gps.h"
#include <osfuncs.h>
#include <platform/chipset/chipset.h>
@ -86,6 +87,13 @@
#include "gpu/bus/kern_bus.h"
//
// If timer callback comes when PM resume is in progress, then it can't be
// serviced. The timer needs to be rescheduled in this case. This time controls
// the duration of rescheduling.
//
#define TIMER_RESCHED_TIME_DURING_PM_RESUME_NS (100 * 1000 * 1000)
//
// Helper function which can be called before doing any RM control
// This function:
@ -499,6 +507,8 @@ done:
new_event->active = NV_TRUE;
new_event->refcount = 0;
nvfp->bCleanupRmapi = NV_TRUE;
NV_PRINTF(LEVEL_INFO, "allocated OS event:\n");
NV_PRINTF(LEVEL_INFO, " hParent: 0x%x\n", hParent);
NV_PRINTF(LEVEL_INFO, " fd: %d\n", fd);
@ -1158,12 +1168,47 @@ NV_STATUS RmPowerSourceChangeEvent(
&params, sizeof(params));
}
/*!
* @brief Function to request latest D-Notifier status from SBIOS.
*
* Handle certain scenarios (like a fresh boot or suspend/resume
* of the system) when RM is not available to receive the Dx notifiers.
* This function gets the latest D-Notifier status from SBIOS
* when RM is ready to receive and handle those events.
* Use GPS_FUNC_REQUESTDXSTATE subfunction to invoke current Dx state.
*
* @param[in] pNv nv_state_t pointer.
*/
void RmRequestDNotifierState(
nv_state_t *pNv
)
{
OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
NvU32 supportedFuncs = 0;
NvU16 dsmDataSize = sizeof(supportedFuncs);
NV_STATUS status = NV_OK;
status = osCallACPI_DSM(pGpu, ACPI_DSM_FUNCTION_GPS_2X,
GPS_FUNC_REQUESTDXSTATE, &supportedFuncs,
&dsmDataSize);
if (status != NV_OK)
{
//
// Call for 'GPS_FUNC_REQUESTDXSTATE' subfunction may fail if the
// SBIOS/EC does not have the corresponding implementation.
//
NV_PRINTF(LEVEL_INFO,
"%s: Failed to request Dx event update, status 0x%x\n",
__FUNCTION__, status);
}
}
/*!
* @brief Deal with D-notifier events to apply a performance
* level based on the requested auxiliary power-state.
* Read confluence page "D-Notifiers on Linux" for more details.
*
* @param[in] pGpu OBJGPU pointer.
* @param[in] pNv nv_state_t pointer.
* @param[in] event_type NvU32 Event type.
*/
static void RmHandleDNotifierEvent(
@ -2551,6 +2596,16 @@ void NV_API_CALL rm_cleanup_file_private(
OBJSYS *pSys = SYS_GET_INSTANCE();
NV_ENTER_RM_RUNTIME(sp,fp);
//
// Skip cleaning up this fd if:
// - no RMAPI clients and events were ever allocated on this fd
// - no RMAPI object handles were exported on this fd
// Access nvfp->handles without locking as fd cleanup is synchronised by the kernel
//
if (!nvfp->bCleanupRmapi && nvfp->handles == NULL)
goto done;
pRmApi = rmapiGetInterface(RMAPI_EXTERNAL);
threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
threadStateSetTimeoutOverride(&threadState, 10 * 1000);
@ -2600,6 +2655,7 @@ void NV_API_CALL rm_cleanup_file_private(
rmapiEpilogue(pRmApi, &rmApiContext);
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
done:
if (nvfp->ctl_nvfp != NULL)
{
nv_put_file_private(nvfp->ctl_nvfp_priv);
@ -3018,14 +3074,16 @@ static NV_STATUS RmRunNanoTimerCallback(
if ((status = rmGpuLocksAcquire(GPU_LOCK_FLAGS_COND_ACQUIRE, RM_LOCK_MODULES_TMR)) != NV_OK)
{
TMR_EVENT *pEvent = (TMR_EVENT *)pTmrEvent;
NvU64 timeNs = pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) ?
TIMER_RESCHED_TIME_DURING_PM_RESUME_NS :
osGetTickResolution();
//
// We failed to acquire the lock - depending on what's holding it,
// the lock could be held for a while, so try again soon, but not too
// soon to prevent the owner from making forward progress indefinitely.
//
return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->pOSTmrCBdata,
osGetTickResolution());
return osStartNanoTimer(pGpu->pOsGpuInfo, pEvent->pOSTmrCBdata, timeNs);
}
threadStateInitISRAndDeferredIntHandler(&threadState, pGpu,
@ -3062,7 +3120,7 @@ NV_STATUS NV_API_CALL rm_run_nano_timer_callback
if (pGpu == NULL)
return NV_ERR_GENERIC;
if (!FULL_GPU_SANITY_CHECK(pGpu))
if (!FULL_GPU_SANITY_FOR_PM_RESUME(pGpu))
{
return NV_ERR_GENERIC;
}
@ -4059,6 +4117,48 @@ void NV_API_CALL rm_power_source_change_event(
NV_EXIT_RM_RUNTIME(sp,fp);
}
void NV_API_CALL rm_request_dnotifier_state(
nv_stack_t *sp,
nv_state_t *pNv
)
{
nv_priv_t *nvp = NV_GET_NV_PRIV(pNv);
if (nvp->b_mobile_config_enabled)
{
THREAD_STATE_NODE threadState;
void *fp;
GPU_MASK gpuMask;
NV_ENTER_RM_RUNTIME(sp,fp);
threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
// LOCK: acquire API lock
if ((rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_ACPI)) == NV_OK)
{
OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
// LOCK: acquire per device lock
if ((pGpu != NULL) &&
((rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_ACPI,
&gpuMask)) == NV_OK))
{
RmRequestDNotifierState(pNv);
// UNLOCK: release per device lock
rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
}
// UNLOCK: release API lock
rmapiLockRelease();
}
threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
NV_EXIT_RM_RUNTIME(sp,fp);
}
}
NV_STATUS NV_API_CALL rm_p2p_dma_map_pages(
nvidia_stack_t *sp,
nv_dma_device_t *peer,

View File

@ -72,6 +72,7 @@ osCreateMemFromOsDescriptor
void *pPrivate;
pClient = serverutilGetClientUnderLock(hClient);
if ((pDescriptor == NvP64_NULL) ||
(*pLimit == 0) ||
(pClient == NULL))
@ -362,6 +363,23 @@ osCheckGpuBarsOverlapAddrRange
return NV_OK;
}
static NvU64
_doWarBug4040336
(
OBJGPU *pGpu,
NvU64 addr
)
{
if (gpuIsWarBug4040336Enabled(pGpu))
{
if ((addr & 0xffffffff00000000ULL) == 0x7fff00000000ULL)
{
addr = addr & 0xffffffffULL;
}
}
return addr;
}
static NV_STATUS
osCreateOsDescriptorFromIoMemory
(
@ -440,6 +458,14 @@ osCreateOsDescriptorFromIoMemory
return rmStatus;
}
//
// BF3's PCIe MMIO bus address at 0x800000000000(CPU PA 0x7fff00000000) is
// too high for Ampere to address. As a result, BF3's bus address is
// moved to < 4GB. Now, the CPU PA and the bus address are no longer 1:1
// and needs to be adjusted.
//
*base = _doWarBug4040336(pGpu, *base);
rmStatus = memdescCreate(ppMemDesc, pGpu, (*pLimit + 1), 0,
NV_MEMORY_CONTIGUOUS, ADDR_SYSMEM,
NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);

View File

@ -869,30 +869,6 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_context_clear(nvidia_stack_t *sp,
return rmStatus;
}
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_acquire_encryption_iv(nvidia_stack_t *sp,
struct ccslContext_t *ctx,
NvU8 *encryptIv)
{
NV_STATUS rmStatus;
void *fp;
NV_ENTER_RM_RUNTIME(sp,fp);
rmStatus = nvGpuOpsCcslAcquireEncryptionIv(ctx, encryptIv);
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_log_device_encryption(nvidia_stack_t *sp,
struct ccslContext_t *ctx,
NvU8 *decryptIv)
{
NV_STATUS rmStatus;
void *fp;
NV_ENTER_RM_RUNTIME(sp,fp);
rmStatus = nvGpuOpsCcslLogDeviceEncryption(ctx, decryptIv);
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_rotate_iv(nvidia_stack_t *sp,
struct ccslContext_t *ctx,
NvU8 direction)
@ -942,12 +918,15 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_decrypt(nvidia_stack_t *sp,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagData)
{
NV_STATUS rmStatus;
void *fp;
NV_ENTER_RM_RUNTIME(sp,fp);
rmStatus = nvGpuOpsCcslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, outputBuffer, authTagData);
rmStatus = nvGpuOpsCcslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, outputBuffer,
addAuthData, addAuthDataSize, authTagData);
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}
@ -979,3 +958,17 @@ NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_query_message_pool(nvidia_stack_t *sp,
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}
NV_STATUS NV_API_CALL rm_gpu_ops_ccsl_increment_iv(nvidia_stack_t *sp,
struct ccslContext_t *ctx,
NvU8 direction,
NvU64 increment,
NvU8 *iv)
{
NV_STATUS rmStatus;
void *fp;
NV_ENTER_RM_RUNTIME(sp,fp);
rmStatus = nvGpuOpsIncrementIv(ctx, direction, increment, iv);
NV_EXIT_RM_RUNTIME(sp,fp);
return rmStatus;
}

View File

@ -41,6 +41,7 @@
--undefined=rm_release_all_gpus_lock
--undefined=rm_shutdown_rm
--undefined=rm_power_source_change_event
--undefined=rm_request_dnotifier_state
--undefined=rm_write_registry_binary
--undefined=rm_write_registry_dword
--undefined=rm_write_registry_string
@ -121,16 +122,15 @@
--undefined=rm_gpu_ops_paging_channel_push_stream
--undefined=rm_gpu_ops_device_create
--undefined=rm_gpu_ops_device_destroy
--undefined=rm_gpu_ops_ccsl_acquire_encryption_iv
--undefined=rm_gpu_ops_ccsl_sign
--undefined=rm_gpu_ops_ccsl_encrypt
--undefined=rm_gpu_ops_ccsl_encrypt_with_iv
--undefined=rm_gpu_ops_ccsl_context_init
--undefined=rm_gpu_ops_ccsl_context_clear
--undefined=rm_gpu_ops_ccsl_log_device_encryption
--undefined=rm_gpu_ops_ccsl_rotate_iv
--undefined=rm_gpu_ops_ccsl_decrypt
--undefined=rm_gpu_ops_ccsl_query_message_pool
--undefined=rm_gpu_ops_ccsl_increment_iv
--undefined=rm_log_gpu_crash
--undefined=rm_kernel_rmapi_op
--undefined=nv_get_hypervisor_type

View File

@ -0,0 +1,88 @@
#define NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
#include "nvoc/runtime.h"
#include "nvoc/rtti.h"
#include "nvtypes.h"
#include "nvport/nvport.h"
#include "nvport/inline/util_valist.h"
#include "utils/nvassert.h"
#include "g_ccsl_nvoc.h"
#ifdef DEBUG
char __nvoc_class_id_uniqueness_check_0x9bf1a1 = 1;
#endif
extern const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl;
void __nvoc_init_Ccsl(Ccsl*);
void __nvoc_init_funcTable_Ccsl(Ccsl*);
NV_STATUS __nvoc_ctor_Ccsl(Ccsl*);
void __nvoc_init_dataField_Ccsl(Ccsl*);
void __nvoc_dtor_Ccsl(Ccsl*);
extern const struct NVOC_EXPORT_INFO __nvoc_export_info_Ccsl;
static const struct NVOC_RTTI __nvoc_rtti_Ccsl_Ccsl = {
/*pClassDef=*/ &__nvoc_class_def_Ccsl,
/*dtor=*/ (NVOC_DYNAMIC_DTOR) &__nvoc_dtor_Ccsl,
/*offset=*/ 0,
};
static const struct NVOC_CASTINFO __nvoc_castinfo_Ccsl = {
/*numRelatives=*/ 1,
/*relatives=*/ {
&__nvoc_rtti_Ccsl_Ccsl,
},
};
// Not instantiable because it's not derived from class "Object"
const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl =
{
/*classInfo=*/ {
/*size=*/ sizeof(Ccsl),
/*classId=*/ classId(Ccsl),
/*providerId=*/ &__nvoc_rtti_provider,
#if NV_PRINTF_STRINGS_ALLOWED
/*name=*/ "Ccsl",
#endif
},
/*objCreatefn=*/ (NVOC_DYNAMIC_OBJ_CREATE) NULL,
/*pCastInfo=*/ &__nvoc_castinfo_Ccsl,
/*pExportInfo=*/ &__nvoc_export_info_Ccsl
};
const struct NVOC_EXPORT_INFO __nvoc_export_info_Ccsl =
{
/*numEntries=*/ 0,
/*pExportEntries=*/ 0
};
void __nvoc_dtor_Ccsl(Ccsl *pThis) {
PORT_UNREFERENCED_VARIABLE(pThis);
}
void __nvoc_init_dataField_Ccsl(Ccsl *pThis) {
PORT_UNREFERENCED_VARIABLE(pThis);
}
NV_STATUS __nvoc_ctor_Ccsl(Ccsl *pThis) {
NV_STATUS status = NV_OK;
__nvoc_init_dataField_Ccsl(pThis);
goto __nvoc_ctor_Ccsl_exit; // Success
__nvoc_ctor_Ccsl_exit:
return status;
}
static void __nvoc_init_funcTable_Ccsl_1(Ccsl *pThis) {
PORT_UNREFERENCED_VARIABLE(pThis);
}
void __nvoc_init_funcTable_Ccsl(Ccsl *pThis) {
__nvoc_init_funcTable_Ccsl_1(pThis);
}
void __nvoc_init_Ccsl(Ccsl *pThis) {
pThis->__nvoc_pbase_Ccsl = pThis;
__nvoc_init_funcTable_Ccsl(pThis);
}

View File

@ -0,0 +1,189 @@
#ifndef _G_CCSL_NVOC_H_
#define _G_CCSL_NVOC_H_
#include "nvoc/runtime.h"
#ifdef __cplusplus
extern "C" {
#endif
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "g_ccsl_nvoc.h"
#ifndef CCSL_H
#define CCSL_H
#include "nvstatus.h"
#include "nvmisc.h"
#include "kernel/gpu/conf_compute/conf_compute.h"
#define CCSL_DIR_HOST_TO_DEVICE 0
#define CCSL_DIR_DEVICE_TO_HOST 1
struct ccslContext_t
{
NvHandle hClient;
NvHandle hChannel;
enum {CSL_MSG_CTR_32, CSL_MSG_CTR_64} msgCounterSize;
NvU8 keyIn[CC_AES_256_GCM_KEY_SIZE_BYTES];
union
{
struct
{
NvU8 ivIn[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU8 ivMaskIn[CC_AES_256_GCM_IV_SIZE_BYTES];
};
NvU8 nonce[CC_HMAC_NONCE_SIZE_BYTES];
};
NvU8 keyOut[CC_AES_256_GCM_KEY_SIZE_BYTES];
NvU8 ivOut[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU8 ivMaskOut[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU64 keyHandleIn;
NvU64 keyHandleOut;
};
typedef struct ccslContext_t *pCcslContext;
/****************************************************************************\
* *
* CCSL module header. *
* *
****************************************************************************/
#ifdef NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
#define PRIVATE_FIELD(x) x
#else
#define PRIVATE_FIELD(x) NVOC_PRIVATE_FIELD(x)
#endif
struct Ccsl {
const struct NVOC_RTTI *__nvoc_rtti;
struct Ccsl *__nvoc_pbase_Ccsl;
};
#ifndef __NVOC_CLASS_Ccsl_TYPEDEF__
#define __NVOC_CLASS_Ccsl_TYPEDEF__
typedef struct Ccsl Ccsl;
#endif /* __NVOC_CLASS_Ccsl_TYPEDEF__ */
#ifndef __nvoc_class_id_Ccsl
#define __nvoc_class_id_Ccsl 0x9bf1a1
#endif /* __nvoc_class_id_Ccsl */
extern const struct NVOC_CLASS_DEF __nvoc_class_def_Ccsl;
#define __staticCast_Ccsl(pThis) \
((pThis)->__nvoc_pbase_Ccsl)
#ifdef __nvoc_ccsl_h_disabled
#define __dynamicCast_Ccsl(pThis) ((Ccsl*)NULL)
#else //__nvoc_ccsl_h_disabled
#define __dynamicCast_Ccsl(pThis) \
((Ccsl*)__nvoc_dynamicCast(staticCast((pThis), Dynamic), classInfo(Ccsl)))
#endif //__nvoc_ccsl_h_disabled
NV_STATUS __nvoc_objCreateDynamic_Ccsl(Ccsl**, Dynamic*, NvU32, va_list);
NV_STATUS __nvoc_objCreate_Ccsl(Ccsl**, Dynamic*, NvU32);
#define __objCreate_Ccsl(ppNewObj, pParent, createFlags) \
__nvoc_objCreate_Ccsl((ppNewObj), staticCast((pParent), Dynamic), (createFlags))
NV_STATUS ccslContextInitViaChannel_IMPL(pCcslContext *ppCtx, NvHandle hClient, NvHandle hChannel);
#define ccslContextInitViaChannel(ppCtx, hClient, hChannel) ccslContextInitViaChannel_IMPL(ppCtx, hClient, hChannel)
#define ccslContextInitViaChannel_HAL(ppCtx, hClient, hChannel) ccslContextInitViaChannel(ppCtx, hClient, hChannel)
NV_STATUS ccslContextInitViaKeyId_KERNEL(struct ConfidentialCompute *pConfCompute, pCcslContext *ppCtx, NvU32 globalKeyId);
#define ccslContextInitViaKeyId(pConfCompute, ppCtx, globalKeyId) ccslContextInitViaKeyId_KERNEL(pConfCompute, ppCtx, globalKeyId)
#define ccslContextInitViaKeyId_HAL(pConfCompute, ppCtx, globalKeyId) ccslContextInitViaKeyId(pConfCompute, ppCtx, globalKeyId)
NV_STATUS ccslRotateIv_IMPL(pCcslContext ctx, NvU8 direction);
#define ccslRotateIv(ctx, direction) ccslRotateIv_IMPL(ctx, direction)
#define ccslRotateIv_HAL(ctx, direction) ccslRotateIv(ctx, direction)
NV_STATUS ccslEncryptWithIv_IMPL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, NvU8 *encryptIv, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, NvU8 *authTagBuffer);
#define ccslEncryptWithIv(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncryptWithIv_IMPL(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
#define ccslEncryptWithIv_HAL(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncryptWithIv(ctx, bufferSize, inputBuffer, encryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
NV_STATUS ccslEncrypt_KERNEL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, NvU8 *authTagBuffer);
#define ccslEncrypt(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncrypt_KERNEL(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer)
#define ccslEncrypt_HAL(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslEncrypt(ctx, bufferSize, inputBuffer, aadBuffer, aadSize, outputBuffer, authTagBuffer)
NV_STATUS ccslDecrypt_KERNEL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, const NvU8 *decryptIv, const NvU8 *aadBuffer, NvU32 aadSize, NvU8 *outputBuffer, const NvU8 *authTagBuffer);
#define ccslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslDecrypt_KERNEL(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
#define ccslDecrypt_HAL(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer) ccslDecrypt(ctx, bufferSize, inputBuffer, decryptIv, aadBuffer, aadSize, outputBuffer, authTagBuffer)
NV_STATUS ccslSign_IMPL(pCcslContext ctx, NvU32 bufferSize, const NvU8 *inputBuffer, NvU8 *authTagBuffer);
#define ccslSign(ctx, bufferSize, inputBuffer, authTagBuffer) ccslSign_IMPL(ctx, bufferSize, inputBuffer, authTagBuffer)
#define ccslSign_HAL(ctx, bufferSize, inputBuffer, authTagBuffer) ccslSign(ctx, bufferSize, inputBuffer, authTagBuffer)
NV_STATUS ccslQueryMessagePool_IMPL(pCcslContext ctx, NvU8 direction, NvU64 *messageNum);
#define ccslQueryMessagePool(ctx, direction, messageNum) ccslQueryMessagePool_IMPL(ctx, direction, messageNum)
#define ccslQueryMessagePool_HAL(ctx, direction, messageNum) ccslQueryMessagePool(ctx, direction, messageNum)
NV_STATUS ccslIncrementIv_IMPL(pCcslContext pCtx, NvU8 direction, NvU64 increment, NvU8 *iv);
#define ccslIncrementIv(pCtx, direction, increment, iv) ccslIncrementIv_IMPL(pCtx, direction, increment, iv)
#define ccslIncrementIv_HAL(pCtx, direction, increment, iv) ccslIncrementIv(pCtx, direction, increment, iv)
void ccslContextClear_IMPL(pCcslContext ctx);
#define ccslContextClear(ctx) ccslContextClear_IMPL(ctx)
NV_STATUS ccslIncrementCounter_IMPL(pCcslContext pCtx, NvU8 *ctr, NvU64 increment);
#define ccslIncrementCounter(pCtx, ctr, increment) ccslIncrementCounter_IMPL(pCtx, ctr, increment)
#undef PRIVATE_FIELD
#ifndef NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
#undef ccslIncrementCounter
NV_STATUS NVOC_PRIVATE_FUNCTION(ccslIncrementCounter)(pCcslContext pCtx, NvU8 *ctr, NvU64 increment);
#endif // NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
#endif // CCSL_H
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _G_CCSL_NVOC_H_

View File

@ -17,7 +17,7 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_Object;
void __nvoc_init_CeUtils(CeUtils*);
void __nvoc_init_funcTable_CeUtils(CeUtils*);
NV_STATUS __nvoc_ctor_CeUtils(CeUtils*, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
NV_STATUS __nvoc_ctor_CeUtils(CeUtils*, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
void __nvoc_init_dataField_CeUtils(CeUtils*);
void __nvoc_dtor_CeUtils(CeUtils*);
extern const struct NVOC_EXPORT_INFO __nvoc_export_info_CeUtils;
@ -75,13 +75,13 @@ void __nvoc_init_dataField_CeUtils(CeUtils *pThis) {
}
NV_STATUS __nvoc_ctor_Object(Object* );
NV_STATUS __nvoc_ctor_CeUtils(CeUtils *pThis, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
NV_STATUS __nvoc_ctor_CeUtils(CeUtils *pThis, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
NV_STATUS status = NV_OK;
status = __nvoc_ctor_Object(&pThis->__nvoc_base_Object);
if (status != NV_OK) goto __nvoc_ctor_CeUtils_fail_Object;
__nvoc_init_dataField_CeUtils(pThis);
status = __nvoc_ceutilsConstruct(pThis, arg_pGpu, arg_pAllocParams);
status = __nvoc_ceutilsConstruct(pThis, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);
if (status != NV_OK) goto __nvoc_ctor_CeUtils_fail__init;
goto __nvoc_ctor_CeUtils_exit; // Success
@ -109,7 +109,7 @@ void __nvoc_init_CeUtils(CeUtils *pThis) {
__nvoc_init_funcTable_CeUtils(pThis);
}
NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams) {
NV_STATUS status;
Object *pParentObj;
CeUtils *pThis;
@ -135,7 +135,7 @@ NV_STATUS __nvoc_objCreate_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 cre
}
__nvoc_init_CeUtils(pThis);
status = __nvoc_ctor_CeUtils(pThis, arg_pGpu, arg_pAllocParams);
status = __nvoc_ctor_CeUtils(pThis, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);
if (status != NV_OK) goto __nvoc_objCreate_CeUtils_cleanup;
*ppThis = pThis;
@ -156,9 +156,10 @@ __nvoc_objCreate_CeUtils_cleanup:
NV_STATUS __nvoc_objCreateDynamic_CeUtils(CeUtils **ppThis, Dynamic *pParent, NvU32 createFlags, va_list args) {
NV_STATUS status;
OBJGPU * arg_pGpu = va_arg(args, OBJGPU *);
KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance = va_arg(args, KERNEL_MIG_GPU_INSTANCE *);
NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams = va_arg(args, NV0050_ALLOCATION_PARAMETERS *);
status = __nvoc_objCreate_CeUtils(ppThis, pParent, createFlags, arg_pGpu, arg_pAllocParams);
status = __nvoc_objCreate_CeUtils(ppThis, pParent, createFlags, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams);
return status;
}

View File

@ -76,7 +76,6 @@ struct CeUtils {
NvHandle hDevice;
NvHandle hSubdevice;
OBJCHANNEL *pChannel;
KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
OBJGPU *pGpu;
struct KernelCE *pKCe;
NvBool bUseVasForCeCopy;
@ -109,47 +108,16 @@ extern const struct NVOC_CLASS_DEF __nvoc_class_def_CeUtils;
NV_STATUS __nvoc_objCreateDynamic_CeUtils(CeUtils**, Dynamic*, NvU32, va_list);
NV_STATUS __nvoc_objCreate_CeUtils(CeUtils**, Dynamic*, NvU32, OBJGPU * arg_pGpu, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
#define __objCreate_CeUtils(ppNewObj, pParent, createFlags, arg_pGpu, arg_pAllocParams) \
__nvoc_objCreate_CeUtils((ppNewObj), staticCast((pParent), Dynamic), (createFlags), arg_pGpu, arg_pAllocParams)
NV_STATUS __nvoc_objCreate_CeUtils(CeUtils**, Dynamic*, NvU32, OBJGPU * arg_pGpu, KERNEL_MIG_GPU_INSTANCE * arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS * arg_pAllocParams);
#define __objCreate_CeUtils(ppNewObj, pParent, createFlags, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams) \
__nvoc_objCreate_CeUtils((ppNewObj), staticCast((pParent), Dynamic), (createFlags), arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams)
NV_STATUS ceutilsConstruct_IMPL(struct CeUtils *arg_pCeUtils, OBJGPU *arg_pGpu, NV0050_ALLOCATION_PARAMETERS *arg_pAllocParams);
NV_STATUS ceutilsConstruct_IMPL(struct CeUtils *arg_pCeUtils, OBJGPU *arg_pGpu, KERNEL_MIG_GPU_INSTANCE *arg_pKernelMIGGPUInstance, NV0050_ALLOCATION_PARAMETERS *arg_pAllocParams);
#define __nvoc_ceutilsConstruct(arg_pCeUtils, arg_pGpu, arg_pAllocParams) ceutilsConstruct_IMPL(arg_pCeUtils, arg_pGpu, arg_pAllocParams)
#define __nvoc_ceutilsConstruct(arg_pCeUtils, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams) ceutilsConstruct_IMPL(arg_pCeUtils, arg_pGpu, arg_pKernelMIGGPUInstance, arg_pAllocParams)
void ceutilsDestruct_IMPL(struct CeUtils *pCeUtils);
#define __nvoc_ceutilsDestruct(pCeUtils) ceutilsDestruct_IMPL(pCeUtils)
NV_STATUS ceutilsInitialize_IMPL(struct CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
#ifdef __nvoc_ce_utils_h_disabled
static inline NV_STATUS ceutilsInitialize(struct CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams) {
NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_ce_utils_h_disabled
#define ceutilsInitialize(pCeUtils, pGpu, pAllocParams) ceutilsInitialize_IMPL(pCeUtils, pGpu, pAllocParams)
#endif //__nvoc_ce_utils_h_disabled
void ceutilsDeinit_IMPL(struct CeUtils *pCeUtils);
#ifdef __nvoc_ce_utils_h_disabled
static inline void ceutilsDeinit(struct CeUtils *pCeUtils) {
NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
}
#else //__nvoc_ce_utils_h_disabled
#define ceutilsDeinit(pCeUtils) ceutilsDeinit_IMPL(pCeUtils)
#endif //__nvoc_ce_utils_h_disabled
void ceutilsRegisterGPUInstance_IMPL(struct CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance);
#ifdef __nvoc_ce_utils_h_disabled
static inline void ceutilsRegisterGPUInstance(struct CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance) {
NV_ASSERT_FAILED_PRECOMP("CeUtils was disabled!");
}
#else //__nvoc_ce_utils_h_disabled
#define ceutilsRegisterGPUInstance(pCeUtils, pKernelMIGGPUInstance) ceutilsRegisterGPUInstance_IMPL(pCeUtils, pKernelMIGGPUInstance)
#endif //__nvoc_ce_utils_h_disabled
NV_STATUS ceutilsMemset_IMPL(struct CeUtils *pCeUtils, CEUTILS_MEMSET_PARAMS *pParams);
#ifdef __nvoc_ce_utils_h_disabled

View File

@ -367,6 +367,17 @@ void __nvoc_init_dataField_OBJGPU(OBJGPU *pThis) {
}
pThis->bIsGspOwnedFaultBuffersEnabled = ((NvBool)(0 != 0));
// Hal field -- bEnableBar1SparseForFillPteMemUnmap
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x11f0fc00UL) )) /* ChipHal: GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 | GH100 */
{
pThis->bEnableBar1SparseForFillPteMemUnmap = ((NvBool)(0 == 0));
}
// default
else
{
pThis->bEnableBar1SparseForFillPteMemUnmap = ((NvBool)(0 != 0));
}
}
NV_STATUS __nvoc_ctor_Object(Object* );

View File

@ -1139,6 +1139,7 @@ struct OBJGPU {
NvBool bStateUnloading;
NvBool bStateLoaded;
NvBool bFullyConstructed;
NvBool bBf3WarBug4040336Enabled;
NvBool bUnifiedMemorySpaceEnabled;
NvBool bSriovEnabled;
NvBool bWarBug200577889SriovHeavyEnabled;
@ -1186,6 +1187,7 @@ struct OBJGPU {
NvBool bRecheckSliSupportAtResume;
_GPU_SLI_PEER peer[2];
NvBool bIsGspOwnedFaultBuffersEnabled;
NvBool bEnableBar1SparseForFillPteMemUnmap;
_GPU_GC6_STATE gc6State;
};
@ -3284,6 +3286,10 @@ static inline NvBool gpuIsUnifiedMemorySpaceEnabled(struct OBJGPU *pGpu) {
return pGpu->bUnifiedMemorySpaceEnabled;
}
static inline NvBool gpuIsWarBug4040336Enabled(struct OBJGPU *pGpu) {
return pGpu->bBf3WarBug4040336Enabled;
}
static inline NvBool gpuIsSriovEnabled(struct OBJGPU *pGpu) {
return pGpu->bSriovEnabled;
}

View File

@ -473,6 +473,16 @@ static void __nvoc_init_funcTable_KernelBif_1(KernelBif *pThis, RmHalspecOwner *
pThis->__kbifPreOsGlobalErotGrantRequest__ = &kbifPreOsGlobalErotGrantRequest_56cd7a;
}
// Hal function -- kbifCacheVFInfo
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_GH100;
}
else
{
pThis->__kbifCacheVFInfo__ = &kbifCacheVFInfo_TU102;
}
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelBif_engstateConstructEngine;
pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelBif_engstateStateInitLocked;

View File

@ -115,6 +115,7 @@ struct KernelBif {
NV_STATUS (*__kbifGetPciConfigSpacePriMirror__)(struct OBJGPU *, struct KernelBif *, NvU32 *, NvU32 *);
NV_STATUS (*__kbifGetBusOptionsAddr__)(struct OBJGPU *, struct KernelBif *, BUS_OPTIONS, NvU32 *);
NV_STATUS (*__kbifPreOsGlobalErotGrantRequest__)(struct OBJGPU *, struct KernelBif *);
void (*__kbifCacheVFInfo__)(struct OBJGPU *, struct KernelBif *);
NV_STATUS (*__kbifStatePreLoad__)(POBJGPU, struct KernelBif *, NvU32);
NV_STATUS (*__kbifStatePostUnload__)(POBJGPU, struct KernelBif *, NvU32);
void (*__kbifStateDestroy__)(POBJGPU, struct KernelBif *);
@ -263,6 +264,8 @@ NV_STATUS __nvoc_objCreate_KernelBif(KernelBif**, Dynamic*, NvU32);
#define kbifGetBusOptionsAddr_HAL(pGpu, pKernelBif, options, addrReg) kbifGetBusOptionsAddr_DISPATCH(pGpu, pKernelBif, options, addrReg)
#define kbifPreOsGlobalErotGrantRequest(pGpu, pKernelBif) kbifPreOsGlobalErotGrantRequest_DISPATCH(pGpu, pKernelBif)
#define kbifPreOsGlobalErotGrantRequest_HAL(pGpu, pKernelBif) kbifPreOsGlobalErotGrantRequest_DISPATCH(pGpu, pKernelBif)
#define kbifCacheVFInfo(pGpu, pKernelBif) kbifCacheVFInfo_DISPATCH(pGpu, pKernelBif)
#define kbifCacheVFInfo_HAL(pGpu, pKernelBif) kbifCacheVFInfo_DISPATCH(pGpu, pKernelBif)
#define kbifStatePreLoad(pGpu, pEngstate, arg0) kbifStatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
#define kbifStatePostUnload(pGpu, pEngstate, arg0) kbifStatePostUnload_DISPATCH(pGpu, pEngstate, arg0)
#define kbifStateDestroy(pGpu, pEngstate) kbifStateDestroy_DISPATCH(pGpu, pEngstate)
@ -606,6 +609,14 @@ static inline NV_STATUS kbifPreOsGlobalErotGrantRequest_DISPATCH(struct OBJGPU *
return pKernelBif->__kbifPreOsGlobalErotGrantRequest__(pGpu, pKernelBif);
}
void kbifCacheVFInfo_TU102(struct OBJGPU *pGpu, struct KernelBif *pKernelBif);
void kbifCacheVFInfo_GH100(struct OBJGPU *pGpu, struct KernelBif *pKernelBif);
static inline void kbifCacheVFInfo_DISPATCH(struct OBJGPU *pGpu, struct KernelBif *pKernelBif) {
pKernelBif->__kbifCacheVFInfo__(pGpu, pKernelBif);
}
static inline NV_STATUS kbifStatePreLoad_DISPATCH(POBJGPU pGpu, struct KernelBif *pEngstate, NvU32 arg0) {
return pEngstate->__kbifStatePreLoad__(pGpu, pEngstate, arg0);
}

View File

@ -312,6 +312,7 @@ struct KernelGsp {
struct MESSAGE_QUEUE_COLLECTION *pMQCollection;
struct OBJRPC *pRpc;
struct OBJRPC *pLocklessRpc;
char vbiosVersionStr[16];
KernelGspFlcnUcode *pFwsecUcode;
KernelGspFlcnUcode *pScrubberUcode;
KernelGspFlcnUcode *pBooterLoadUcode;
@ -1172,15 +1173,15 @@ static inline NV_STATUS kgspExecuteSequencerBuffer(struct OBJGPU *pGpu, struct K
#define kgspExecuteSequencerBuffer(pGpu, pKernelGsp, pRunCpuSeqParams) kgspExecuteSequencerBuffer_IMPL(pGpu, pKernelGsp, pRunCpuSeqParams)
#endif //__nvoc_kernel_gsp_h_disabled
NV_STATUS kgspParseFwsecUcodeFromVbiosImg_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode);
NV_STATUS kgspParseFwsecUcodeFromVbiosImg_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode, NvU64 *pVbiosVersionCombined);
#ifdef __nvoc_kernel_gsp_h_disabled
static inline NV_STATUS kgspParseFwsecUcodeFromVbiosImg(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode) {
static inline NV_STATUS kgspParseFwsecUcodeFromVbiosImg(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, const KernelGspVbiosImg *const pVbiosImg, KernelGspFlcnUcode **ppFwsecUcode, NvU64 *pVbiosVersionCombined) {
NV_ASSERT_FAILED_PRECOMP("KernelGsp was disabled!");
return NV_ERR_NOT_SUPPORTED;
}
#else //__nvoc_kernel_gsp_h_disabled
#define kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode) kgspParseFwsecUcodeFromVbiosImg_IMPL(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode)
#define kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode, pVbiosVersionCombined) kgspParseFwsecUcodeFromVbiosImg_IMPL(pGpu, pKernelGsp, pVbiosImg, ppFwsecUcode, pVbiosVersionCombined)
#endif //__nvoc_kernel_gsp_h_disabled
NV_STATUS kgspAllocateScrubberUcodeImage_IMPL(struct OBJGPU *pGpu, struct KernelGsp *pKernelGsp, KernelGspFlcnUcode **ppScrubberUcode);

View File

@ -100,8 +100,8 @@ static NV_STATUS __nvoc_thunk_KernelSec2_kflcnResetHw(struct OBJGPU *pGpu, struc
return ksec2ResetHw(pGpu, (struct KernelSec2 *)(((unsigned char *)pKernelSec2) - __nvoc_rtti_KernelSec2_KernelFalcon.offset));
}
static NV_STATUS __nvoc_thunk_OBJENGSTATE_ksec2StateLoad(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
return engstateStateLoad(pGpu, (struct OBJENGSTATE *)(((unsigned char *)pEngstate) + __nvoc_rtti_KernelSec2_OBJENGSTATE.offset), arg0);
static NV_STATUS __nvoc_thunk_KernelSec2_engstateStateLoad(struct OBJGPU *pGpu, struct OBJENGSTATE *pKernelSec2, NvU32 arg0) {
return ksec2StateLoad(pGpu, (struct KernelSec2 *)(((unsigned char *)pKernelSec2) - __nvoc_rtti_KernelSec2_OBJENGSTATE.offset), arg0);
}
static NV_STATUS __nvoc_thunk_OBJENGSTATE_ksec2StateUnload(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
@ -252,6 +252,17 @@ static void __nvoc_init_funcTable_KernelSec2_1(KernelSec2 *pThis, RmHalspecOwner
// Hal function -- ksec2ResetHw
pThis->__ksec2ResetHw__ = &ksec2ResetHw_TU102;
// Hal function -- ksec2StateLoad
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
{
pThis->__ksec2StateLoad__ = &ksec2StateLoad_GH100;
}
// default
else
{
pThis->__ksec2StateLoad__ = &ksec2StateLoad_56cd7a;
}
// Hal function -- ksec2ReadUcodeFuseVersion
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */
{
@ -300,7 +311,7 @@ static void __nvoc_init_funcTable_KernelSec2_1(KernelSec2 *pThis, RmHalspecOwner
pThis->__nvoc_base_KernelFalcon.__kflcnResetHw__ = &__nvoc_thunk_KernelSec2_kflcnResetHw;
pThis->__ksec2StateLoad__ = &__nvoc_thunk_OBJENGSTATE_ksec2StateLoad;
pThis->__nvoc_base_OBJENGSTATE.__engstateStateLoad__ = &__nvoc_thunk_KernelSec2_engstateStateLoad;
pThis->__ksec2StateUnload__ = &__nvoc_thunk_OBJENGSTATE_ksec2StateUnload;

View File

@ -7,7 +7,7 @@ extern "C" {
#endif
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -64,11 +64,11 @@ struct KernelSec2 {
NV_STATUS (*__ksec2ServiceNotificationInterrupt__)(struct OBJGPU *, struct KernelSec2 *, IntrServiceServiceNotificationInterruptArguments *);
void (*__ksec2ConfigureFalcon__)(struct OBJGPU *, struct KernelSec2 *);
NV_STATUS (*__ksec2ResetHw__)(struct OBJGPU *, struct KernelSec2 *);
NV_STATUS (*__ksec2StateLoad__)(struct OBJGPU *, struct KernelSec2 *, NvU32);
NvU32 (*__ksec2ReadUcodeFuseVersion__)(struct OBJGPU *, struct KernelSec2 *, NvU32);
const BINDATA_ARCHIVE *(*__ksec2GetBinArchiveBlUcode__)(struct OBJGPU *, struct KernelSec2 *);
NV_STATUS (*__ksec2GetGenericBlUcode__)(struct OBJGPU *, struct KernelSec2 *, const RM_FLCN_BL_DESC **, const NvU8 **);
const BINDATA_ARCHIVE *(*__ksec2GetBinArchiveSecurescrubUcode__)(struct OBJGPU *, struct KernelSec2 *);
NV_STATUS (*__ksec2StateLoad__)(POBJGPU, struct KernelSec2 *, NvU32);
NV_STATUS (*__ksec2StateUnload__)(POBJGPU, struct KernelSec2 *, NvU32);
NV_STATUS (*__ksec2StateInitLocked__)(POBJGPU, struct KernelSec2 *);
NV_STATUS (*__ksec2StatePreLoad__)(POBJGPU, struct KernelSec2 *, NvU32);
@ -127,6 +127,8 @@ NV_STATUS __nvoc_objCreate_KernelSec2(KernelSec2**, Dynamic*, NvU32);
#define ksec2ConfigureFalcon_HAL(pGpu, pKernelSec2) ksec2ConfigureFalcon_DISPATCH(pGpu, pKernelSec2)
#define ksec2ResetHw(pGpu, pKernelSec2) ksec2ResetHw_DISPATCH(pGpu, pKernelSec2)
#define ksec2ResetHw_HAL(pGpu, pKernelSec2) ksec2ResetHw_DISPATCH(pGpu, pKernelSec2)
#define ksec2StateLoad(pGpu, pKernelSec2, arg0) ksec2StateLoad_DISPATCH(pGpu, pKernelSec2, arg0)
#define ksec2StateLoad_HAL(pGpu, pKernelSec2, arg0) ksec2StateLoad_DISPATCH(pGpu, pKernelSec2, arg0)
#define ksec2ReadUcodeFuseVersion(pGpu, pKernelSec2, ucodeId) ksec2ReadUcodeFuseVersion_DISPATCH(pGpu, pKernelSec2, ucodeId)
#define ksec2ReadUcodeFuseVersion_HAL(pGpu, pKernelSec2, ucodeId) ksec2ReadUcodeFuseVersion_DISPATCH(pGpu, pKernelSec2, ucodeId)
#define ksec2GetBinArchiveBlUcode(pGpu, pKernelSec2) ksec2GetBinArchiveBlUcode_DISPATCH(pGpu, pKernelSec2)
@ -135,7 +137,6 @@ NV_STATUS __nvoc_objCreate_KernelSec2(KernelSec2**, Dynamic*, NvU32);
#define ksec2GetGenericBlUcode_HAL(pGpu, pKernelSec2, ppDesc, ppImg) ksec2GetGenericBlUcode_DISPATCH(pGpu, pKernelSec2, ppDesc, ppImg)
#define ksec2GetBinArchiveSecurescrubUcode(pGpu, pKernelSec2) ksec2GetBinArchiveSecurescrubUcode_DISPATCH(pGpu, pKernelSec2)
#define ksec2GetBinArchiveSecurescrubUcode_HAL(pGpu, pKernelSec2) ksec2GetBinArchiveSecurescrubUcode_DISPATCH(pGpu, pKernelSec2)
#define ksec2StateLoad(pGpu, pEngstate, arg0) ksec2StateLoad_DISPATCH(pGpu, pEngstate, arg0)
#define ksec2StateUnload(pGpu, pEngstate, arg0) ksec2StateUnload_DISPATCH(pGpu, pEngstate, arg0)
#define ksec2StateInitLocked(pGpu, pEngstate) ksec2StateInitLocked_DISPATCH(pGpu, pEngstate)
#define ksec2StatePreLoad(pGpu, pEngstate, arg0) ksec2StatePreLoad_DISPATCH(pGpu, pEngstate, arg0)
@ -184,6 +185,16 @@ static inline NV_STATUS ksec2ResetHw_DISPATCH(struct OBJGPU *pGpu, struct Kernel
return pKernelSec2->__ksec2ResetHw__(pGpu, pKernelSec2);
}
NV_STATUS ksec2StateLoad_GH100(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0);
static inline NV_STATUS ksec2StateLoad_56cd7a(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0) {
return NV_OK;
}
static inline NV_STATUS ksec2StateLoad_DISPATCH(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 arg0) {
return pKernelSec2->__ksec2StateLoad__(pGpu, pKernelSec2, arg0);
}
static inline NvU32 ksec2ReadUcodeFuseVersion_b2b553(struct OBJGPU *pGpu, struct KernelSec2 *pKernelSec2, NvU32 ucodeId) {
return 0;
}
@ -224,10 +235,6 @@ static inline const BINDATA_ARCHIVE *ksec2GetBinArchiveSecurescrubUcode_DISPATCH
return pKernelSec2->__ksec2GetBinArchiveSecurescrubUcode__(pGpu, pKernelSec2);
}
static inline NV_STATUS ksec2StateLoad_DISPATCH(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
return pEngstate->__ksec2StateLoad__(pGpu, pEngstate, arg0);
}
static inline NV_STATUS ksec2StateUnload_DISPATCH(POBJGPU pGpu, struct KernelSec2 *pEngstate, NvU32 arg0) {
return pEngstate->__ksec2StateUnload__(pGpu, pEngstate, arg0);
}

View File

@ -326,9 +326,8 @@ kvgpumgrGetHostVgpuDeviceFromMdevUuid(NvU32 gpuPciId, const NvU8 *pMdevUuid,
KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice);
NV_STATUS
kvgpumgrGetHostVgpuDeviceFromVmId(NvU32 gpuPciId, VM_ID guestVmId,
KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice,
VM_ID_TYPE vmIdType);
kvgpumgrGetHostVgpuDeviceFromVgpuUuid(NvU32 gpuPciId, NvU8 *vgpuUuid,
KERNEL_HOST_VGPU_DEVICE **ppKernelHostVgpuDevice);
NV_STATUS
kvgpumgrGetCreatableVgpuTypes(struct OBJGPU *pGpu, struct KernelVgpuMgr *pKernelVgpuMgr, NvU32 pgpuIndex, NvU32* numVgpuTypes, NvU32* vgpuTypes);

View File

@ -233,6 +233,9 @@ typedef struct OBJCHANNEL
NvU64 pbGpuVA;
NvU64 pbGpuBitMapVA;
NvU64 pbGpuNotifierVA;
MEMORY_DESCRIPTOR *pUserdMemdesc;
MEMORY_DESCRIPTOR *pChannelBufferMemdesc;
MEMORY_DESCRIPTOR *pErrNotifierMemdesc;
NvU8 *pbCpuVA;
NvU8 *pbBitMapVA;
Nv906fControl *pControlGPFifo;
@ -2801,6 +2804,17 @@ static inline NV_STATUS memmgrInitSavedTopLevelScrubber(OBJGPU *arg0, struct Mem
#define memmgrInitSavedTopLevelScrubber(arg0, arg1) memmgrInitSavedTopLevelScrubber_IMPL(arg0, arg1)
#endif //__nvoc_mem_mgr_h_disabled
MEMORY_DESCRIPTOR *memmgrMemUtilsGetMemDescFromHandle_IMPL(struct MemoryManager *pMemoryManager, NvHandle hClient, NvHandle hMemory);
#ifdef __nvoc_mem_mgr_h_disabled
static inline MEMORY_DESCRIPTOR *memmgrMemUtilsGetMemDescFromHandle(struct MemoryManager *pMemoryManager, NvHandle hClient, NvHandle hMemory) {
NV_ASSERT_FAILED_PRECOMP("MemoryManager was disabled!");
return NULL;
}
#else //__nvoc_mem_mgr_h_disabled
#define memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, hClient, hMemory) memmgrMemUtilsGetMemDescFromHandle_IMPL(pMemoryManager, hClient, hMemory)
#endif //__nvoc_mem_mgr_h_disabled
NV_STATUS memmgrVerifyGspDmaOps_IMPL(OBJGPU *arg0, struct MemoryManager *arg1);
#ifdef __nvoc_mem_mgr_h_disabled

View File

@ -814,6 +814,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x20B5, 0x1642, 0x10de, "NVIDIA A100 80GB PCIe" },
{ 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
{ 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
{ 0x20B7, 0x1804, 0x10de, "NVIDIA A30" },
{ 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
{ 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
{ 0x20F3, 0x179c, 0x10de, "NVIDIA A800-SXM4-80GB" },
@ -1743,6 +1744,20 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2238, 0x16b8, 0x10DE, "NVIDIA A10M-10C" },
{ 0x2238, 0x16b9, 0x10DE, "NVIDIA A10M-20C" },
{ 0x2238, 0x16e6, 0x10DE, "NVIDIA A10M-1" },
{ 0x2321, 0x1853, 0x10DE, "NVIDIA H100L-1-12CME" },
{ 0x2321, 0x1854, 0x10DE, "NVIDIA H100L-1-12C" },
{ 0x2321, 0x1855, 0x10DE, "NVIDIA H100L-1-24C" },
{ 0x2321, 0x1856, 0x10DE, "NVIDIA H100L-2-24C" },
{ 0x2321, 0x1857, 0x10DE, "NVIDIA H100L-3-47C" },
{ 0x2321, 0x1858, 0x10DE, "NVIDIA H100L-4-47C" },
{ 0x2321, 0x1859, 0x10DE, "NVIDIA H100L-7-94C" },
{ 0x2321, 0x185a, 0x10DE, "NVIDIA H100L-4C" },
{ 0x2321, 0x185b, 0x10DE, "NVIDIA H100L-6C" },
{ 0x2321, 0x185c, 0x10DE, "NVIDIA H100L-11C" },
{ 0x2321, 0x185d, 0x10DE, "NVIDIA H100L-15C" },
{ 0x2321, 0x185e, 0x10DE, "NVIDIA H100L-23C" },
{ 0x2321, 0x185f, 0x10DE, "NVIDIA H100L-47C" },
{ 0x2321, 0x1860, 0x10DE, "NVIDIA H100L-94C" },
{ 0x2322, 0x17e2, 0x10DE, "NVIDIA H800-1-10CME" },
{ 0x2322, 0x17e3, 0x10DE, "NVIDIA H800-1-10C" },
{ 0x2322, 0x17e4, 0x10DE, "NVIDIA H800-2-20C" },
@ -1773,6 +1788,20 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x2331, 0x1798, 0x10DE, "NVIDIA H100-5C" },
{ 0x2331, 0x17f0, 0x10DE, "NVIDIA H100-1-10CME" },
{ 0x2331, 0x1844, 0x10DE, "NVIDIA H100-1-20C" },
{ 0x233A, 0x1861, 0x10DE, "NVIDIA H800L-1-12CME" },
{ 0x233A, 0x1862, 0x10DE, "NVIDIA H800L-1-12C" },
{ 0x233A, 0x1863, 0x10DE, "NVIDIA H800L-1-24C" },
{ 0x233A, 0x1864, 0x10DE, "NVIDIA H800L-2-24C" },
{ 0x233A, 0x1865, 0x10DE, "NVIDIA H800L-3-47C" },
{ 0x233A, 0x1866, 0x10DE, "NVIDIA H800L-4-47C" },
{ 0x233A, 0x1867, 0x10DE, "NVIDIA H800L-7-94C" },
{ 0x233A, 0x1868, 0x10DE, "NVIDIA H800L-4C" },
{ 0x233A, 0x1869, 0x10DE, "NVIDIA H800L-6C" },
{ 0x233A, 0x186a, 0x10DE, "NVIDIA H800L-11C" },
{ 0x233A, 0x186b, 0x10DE, "NVIDIA H800L-15C" },
{ 0x233A, 0x186c, 0x10DE, "NVIDIA H800L-23C" },
{ 0x233A, 0x186d, 0x10DE, "NVIDIA H800L-47C" },
{ 0x233A, 0x186e, 0x10DE, "NVIDIA H800L-94C" },
{ 0x25B6, 0x159d, 0x10DE, "NVIDIA A16-1B" },
{ 0x25B6, 0x159e, 0x10DE, "NVIDIA A16-2B" },
{ 0x25B6, 0x159f, 0x10DE, "NVIDIA A16-1Q" },

View File

@ -1213,6 +1213,8 @@ NV_STATUS osVerifySystemEnvironment(OBJGPU *pGpu);
NV_STATUS osSanityTestIsr(OBJGPU *pGpu);
void osAllocatedRmClient(void* pOSInfo);
NV_STATUS osConfigurePcieReqAtomics(OS_GPU_INFO *pOsGpuInfo, NvU32 *pMask);
NvBool osDmabufIsSupported(void);

File diff suppressed because it is too large Load Diff

View File

@ -2402,6 +2402,40 @@ static void rpc_iGrp_ipVersions_Install_v23_03(IGRP_IP_VERSIONS_TABLE_INFO *pInf
#endif //
}
// No enabled chips use this variant provider
static void rpc_iGrp_ipVersions_Install_v23_04(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
{
#if 0
POBJGPU pGpu = pInfo->pGpu;
OBJRPC *pRpc = (OBJRPC *) pInfo->pDynamic;
RPC_HAL_IFACES *pRpcHal = &pRpc->_hal;
// avoid possible unused warnings
pGpu += 0;
pRpcHal += 0;
#endif //
}
// No enabled chips use this variant provider
static void rpc_iGrp_ipVersions_Install_v23_05(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
{
#if 0
POBJGPU pGpu = pInfo->pGpu;
OBJRPC *pRpc = (OBJRPC *) pInfo->pDynamic;
RPC_HAL_IFACES *pRpcHal = &pRpc->_hal;
// avoid possible unused warnings
pGpu += 0;
pRpcHal += 0;
#endif //
}
@ -2920,6 +2954,12 @@ static NV_STATUS rpc_iGrp_ipVersions_getInfo(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
static const IGRP_IP_VERSION_RANGE RPC_IGRP_IP_VERSIONS_RANGES_v23_03[] = {
{ 0x23030000, 0xFFFFFFFF, }, //
};
static const IGRP_IP_VERSION_RANGE RPC_IGRP_IP_VERSIONS_RANGES_v23_04[] = {
{ 0x23040000, 0xFFFFFFFF, }, //
};
static const IGRP_IP_VERSION_RANGE RPC_IGRP_IP_VERSIONS_RANGES_v23_05[] = {
{ 0x23050000, 0xFFFFFFFF, }, //
};
#define _RPC_HAL_IGRP_ENTRY_INIT(v) \
{ RPC_IGRP_IP_VERSIONS_RANGES_##v, NV_ARRAY_ELEMENTS(RPC_IGRP_IP_VERSIONS_RANGES_##v), rpc_iGrp_ipVersions_Install_##v, }
@ -3059,6 +3099,8 @@ static NV_STATUS rpc_iGrp_ipVersions_getInfo(IGRP_IP_VERSIONS_TABLE_INFO *pInfo)
_RPC_HAL_IGRP_ENTRY_INIT(v22_01), //
_RPC_HAL_IGRP_ENTRY_INIT(v23_02), //
_RPC_HAL_IGRP_ENTRY_INIT(v23_03), //
_RPC_HAL_IGRP_ENTRY_INIT(v23_04), //
_RPC_HAL_IGRP_ENTRY_INIT(v23_05), //
};
#undef _RPC_HAL_IGRP_ENTRY_INIT

View File

@ -7,7 +7,7 @@ extern "C" {
#endif
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -322,6 +322,16 @@ typedef struct SYS_STATIC_CONFIG
NvBool bOsSevEnabled;
} SYS_STATIC_CONFIG;
typedef enum
{
CPU_VENDOR_UNKNOWN = 0,
CPU_VENDOR_INTEL,
CPU_VENDOR_AMD,
CPU_VENDOR_WINCHIP,
CPU_VENDOR_CYRIX,
CPU_VENDOR_TRANSM
} CPU_VENDOR;
typedef struct
{
NvBool bInitialized; // Set to true once we id the CPU
@ -340,6 +350,7 @@ typedef struct
// filled in if CPU has embedded name
NvU32 family; // Vendor defined Family/extended Family
NvU32 model; // Vendor defined Model/extended Model
NvU8 vendor; // Vendor CPU_VENDOR
NvU32 coresOnDie; // # of cores on the die (0 if unknown)
NvU32 platformID; // Chip package type
NvU8 stepping; // Silicon stepping

View File

@ -229,7 +229,7 @@
#define RMCFG_FEATURE_PLATFORM_GSP 0 // Running as part of GSP Firmware
#define RMCFG_FEATURE_PLATFORM_MODS_WINDOWS 0 // Running as part of MODS on Windows
#define RMCFG_FEATURE_PLATFORM_MODS_UNIX 0 // Running as part of MODS on UNIX
#define RMCFG_FEATURE_PLATFORM_VMWARE 0 // Running on VMware
#define RMCFG_FEATURE_PLATFORM_UNIX_VMWARE 0 // Running on VMware
#define RMCFG_FEATURE_ARCH_UNKNOWN 0 // unknown arch
#define RMCFG_FEATURE_ARCH_X86 0 // Intel x86, 32bit
#define RMCFG_FEATURE_ARCH_X64 0 // Intel 64bit

View File

@ -1,224 +1,3 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef CCSL_H
#define CCSL_H
#include "g_ccsl_nvoc.h"
#include "nvstatus.h"
#include "nvmisc.h"
#include "kernel/gpu/conf_compute/conf_compute.h"
typedef struct ccslContext_t *pCcslContext;
/*
* Initializes a context by providing client and channel information.
*
* ccslContext [in / out]
* hClient [in]
* hChannel [in]
*/
NV_STATUS
ccslContextInitViaChannel
(
pCcslContext *ppCtx,
NvHandle hClient,
NvHandle hChannel
);
/*
* Initializes a context by providing key ID information.
*
* ConfidentialCompute [in]
* ccslContext [in / out]
* globalKeyId [in]
*/
NV_STATUS
ccslContextInitViaKeyId
(
ConfidentialCompute *pConfCompute,
pCcslContext *ppCtx,
NvU32 globalKeyId
);
/*
* Clears the context and erases sensitive material such as keys.
*
* ccslContext [in / out]
*/
void
ccslContextClear
(
pCcslContext ctx
);
/* To be called before library client triggers a Device-side encryption.
* Attempts to increment the library's Device-side message counter and returns an error if it will overflow.
*
* ccslContext [in]
* decryptIv [in]
*
* Returns NV_ERR_INSUFFICIENT_RESOURCES if the next Device-side encryption will overflow.
* Returns NV_OK otherwise.
*/
NV_STATUS
ccslLogDeviceEncryption
(
pCcslContext ctx,
NvU8 *decryptIv
);
/* Request the next IV to be used in encryption. Storing it explicitly enables the caller
* to perform encryption out of order using EncryptWithIv
*
* ccslContext [in / out]
* encryptIv [out]
*
* Returns NV_ERR_INSUFFICIENT_RESOURCES if the next encryption will overflow.
* Returns NV_OK otherwise.
*/
NV_STATUS
ccslAcquireEncryptionIv
(
pCcslContext ctx,
NvU8 *encryptIv
);
/* Rotate the IV for the given direction.
*
* ccslContext [in / out]
* direction [in]
*/
NV_STATUS
ccslRotateIv
(
pCcslContext ctx,
NvU8 direction
);
/*
* Encrypt and sign data using provided IV
*
* ccslContext [in]
* bufferSize [in] - Size of buffer to be encrypted in units of bytes.
* inputBuffer [in] - Address of plaintext input buffer. For performance it should be 16-byte aligned.
* encryptionIv [in/out] - IV to use for encryption. The IV will be "dirtied" after this operation.
* outputBuffer [in/out] - Address of ciphertext output buffer.
* authTagBuffer [in/out] - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
*
* Returns NV_OK.
*/
NV_STATUS
ccslEncryptWithIv
(
pCcslContext ctx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 *encryptIv,
NvU8 *outputBuffer,
NvU8 *authTagBuffer
);
/*
* If message counter will not overflow then encrypt and sign data.
*
* ccslContext [in]
* bufferSize [in] - Size of buffer to be encrypted in units of bytes.
* inputBuffer [in] - Address of plaintext input buffer. For performance it should be 16-byte aligned.
* outputBuffer [in/out] - Address of ciphertext output buffer.
* authTagBuffer [in/out] - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
*
* Returns NV_ERR_INSUFFICIENT_RESOURCES if message counter will overflow.
* Returns NV_OK otherwise.
*/
NV_STATUS
ccslEncrypt
(
pCcslContext ctx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 *outputBuffer,
NvU8 *authTagBuffer
);
/*
* First verify authentication tag. If authentication passes then the data is decrypted.
*
* ccslContext [in]
* bufferSize [in] - Size of buffer to be decrypted in units of bytes.
* inputBuffer [in] - Address of ciphertext input buffer. For performance it should be 16-byte aligned.
* outputBuffer [in/out] - Address of plaintext output buffer.
* authTagBuffer [in] - Address of authentication tag. In APM it is 32 bytes. In HCC it is 16 bytes.
*
* Returns NV_ERR_INVALID_DATA if verification of the authentication tag fails.
* Returns NV_OK otherwise.
*/
NV_STATUS
ccslDecrypt
(
pCcslContext ctx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *authTagBuffer
);
/*
* Sign the plaintext message.
*
* ccslContext [in]
* bufferSize [in] - Size of buffer to be signed in units of bytes.
* inputBuffer [in] - Address of input buffer. For performance it should be 16-byte aligned.
* authTagBuffer [in/out] - Address of authentication tag. In HCC it is 32 bytes.
*
* Returns NV_OK
*/
NV_STATUS
ccslSign
(
pCcslContext ctx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 *authTagBuffer
);
#define CCSL_DIR_HOST_TO_DEVICE 0
#define CCSL_DIR_DEVICE_TO_HOST 1
/*
* Returns the number of messages that can be encrypted by the CPU (CCSL_DIR_HOST_TO_DEVICE)
* or encrypted by the GPU (CCSL_DIR_DEVICE_TO_HOST) before the message counter will overflow.
*
* ccslContext [in]
* direction [in] - Either CCSL_DIR_HOST_TO_DEVICE or CCSL_DIR_DEVICE_TO_HOST.
* messageNum [out] - Number of messages that can be encrypted before overflow.
*/
NV_STATUS
ccslQueryMessagePool
(
pCcslContext ctx,
NvU8 direction,
NvU64 *messageNum
);
#endif // CCSL_H

View File

@ -42,6 +42,19 @@
#include "platform/chipset/chipset.h" // BUSINFO
#include "gpu/nvbitmask.h" // NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX
// VF related info for GSP-RM
typedef struct GSP_VF_INFO
{
NvU32 totalVFs;
NvU32 firstVFOffset;
NvU64 FirstVFBar0Address;
NvU64 FirstVFBar1Address;
NvU64 FirstVFBar2Address;
NvBool b64bitBar0;
NvBool b64bitBar1;
NvBool b64bitBar2;
} GSP_VF_INFO;
typedef struct GspSMInfo_t
{
NvU32 version;
@ -163,6 +176,7 @@ typedef struct GspSystemInfo
NvU32 hypervisorType;
NvBool bIsPassthru;
NvU64 sysTimerOffsetNs;
GSP_VF_INFO gspVFInfo;
} GspSystemInfo;

View File

@ -57,14 +57,10 @@ typedef struct
NVOC_PREFIX(ceutils) class CeUtils : Object
{
public:
NV_STATUS ceutilsConstruct(CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
NV_STATUS ceutilsConstruct(CeUtils *pCeUtils, OBJGPU *pGpu, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance,
NV0050_ALLOCATION_PARAMETERS *pAllocParams);
void ceutilsDestruct(CeUtils *pCeUtils);
NV_STATUS ceutilsInitialize(CeUtils *pCeUtils, OBJGPU *pGpu, NV0050_ALLOCATION_PARAMETERS *pAllocParams);
void ceutilsDeinit(CeUtils *pCeUtils);
void ceutilsRegisterGPUInstance(CeUtils *pCeUtils, KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance);
NV_STATUS ceutilsMemset(CeUtils *pCeUtils, CEUTILS_MEMSET_PARAMS *pParams);
NV_STATUS ceutilsMemcopy(CeUtils *pCeUtils, CEUTILS_MEMCOPY_PARAMS *pParams);
@ -80,7 +76,6 @@ public:
NvHandle hSubdevice;
OBJCHANNEL *pChannel;
KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance;
OBJGPU *pGpu;
KernelCE *pKCe;

View File

@ -107,17 +107,8 @@
NV_PUSH_DATA(d4); \
} while (0)
#define READ_CHANNEL_PAYLOAD_SEMA(channel) MEM_RD32((NvU8*)channel->pbCpuVA + \
channel->finishPayloadOffset)
#define READ_CHANNEL_PB_SEMA(channel) MEM_RD32((NvU8*)channel->pbCpuVA + \
channel->semaOffset)
#define WRITE_CHANNEL_PB_SEMA(channel, val) MEM_WR32((NvU8*)channel->pbCpuVA + \
channel->semaOffset, val);
#define WRITE_CHANNEL_PAYLOAD_SEMA(channel,val) MEM_WR32((NvU8*)channel->pbCpuVA + \
channel->finishPayloadOffset, val);
#define READ_CHANNEL_PAYLOAD_SEMA(channel) channelReadChannelMemdesc(channel, channel->finishPayloadOffset)
#define READ_CHANNEL_PB_SEMA(channel) channelReadChannelMemdesc(channel, channel->semaOffset)
//
// This struct contains parameters needed to send a pushbuffer for a CE
@ -141,6 +132,7 @@ typedef struct
NV_STATUS channelSetupIDs(OBJCHANNEL *pChannel, OBJGPU *pGpu, NvBool bUseVasForCeCopy, NvBool bMIGInUse);
void channelSetupChannelBufferSizes(OBJCHANNEL *pChannel);
NvU32 channelReadChannelMemdesc(OBJCHANNEL *pChannel, NvU32 offset);
// Needed for pushbuffer management
NV_STATUS channelWaitForFreeEntry(OBJCHANNEL *pChannel, NvU32 *pPutIndex);

View File

@ -87,7 +87,7 @@ typedef struct OBJMEMSCRUB {
PSCRUB_NODE pScrubList;
#if !defined(SRT_BUILD)
// Scrubber uses ceUtils to manage CE channel
CeUtils ceUtilsObject;
CeUtils *pCeUtils;
#endif
struct OBJGPU *pGpu;
VGPU_GUEST_PMA_SCRUB_BUFFER_RING vgpuScrubBuffRing;

View File

@ -46,7 +46,7 @@
__spdmStatus = (expr); \
if (LIBSPDM_STATUS_IS_ERROR(__spdmStatus)) \
{ \
NV_PRINTF(LEVEL_INFO, "SPDM failed with status 0x%0x\n", \
NV_PRINTF(LEVEL_ERROR, "SPDM failed with status 0x%0x\n", \
__spdmStatus); \
status = NV_ERR_GENERIC; \
goto ErrorExit; \

View File

@ -284,10 +284,6 @@ NV_STATUS nvGpuOpsFlushReplayableFaultBuffer(struct gpuDevice *device);
NV_STATUS nvGpuOpsCcslContextInit(struct ccslContext_t **ctx,
gpuChannelHandle channel);
NV_STATUS nvGpuOpsCcslContextClear(struct ccslContext_t *ctx);
NV_STATUS nvGpuOpsCcslLogDeviceEncryption(struct ccslContext_t *ctx,
NvU8 *decryptIv);
NV_STATUS nvGpuOpsCcslAcquireEncryptionIv(struct ccslContext_t *ctx,
NvU8 *encryptIv);
NV_STATUS nvGpuOpsCcslRotateIv(struct ccslContext_t *ctx,
NvU8 direction);
NV_STATUS nvGpuOpsCcslEncrypt(struct ccslContext_t *ctx,
@ -306,6 +302,8 @@ NV_STATUS nvGpuOpsCcslDecrypt(struct ccslContext_t *ctx,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
NvU8 const *authTagBuffer);
NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
NvU32 bufferSize,
@ -314,5 +312,9 @@ NV_STATUS nvGpuOpsCcslSign(struct ccslContext_t *ctx,
NV_STATUS nvGpuOpsQueryMessagePool(struct ccslContext_t *ctx,
NvU8 direction,
NvU64 *messageNum);
NV_STATUS nvGpuOpsIncrementIv(struct ccslContext_t *ctx,
NvU8 direction,
NvU64 increment,
NvU8 *iv);
#endif /* _NV_GPU_OPS_H_*/

View File

@ -286,6 +286,7 @@ typedef struct UvmGpuChannelInfo_tag
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
@ -1031,10 +1032,10 @@ typedef struct UvmCslIv
NvU8 fresh;
} UvmCslIv;
typedef enum UvmCslDirection
typedef enum UvmCslOperation
{
UVM_CSL_DIR_CPU_TO_GPU,
UVM_CSL_DIR_GPU_TO_CPU
} UvmCslDirection;
UVM_CSL_OPERATION_ENCRYPT,
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;
#endif // _NV_UVM_TYPES_H_

View File

@ -1915,4 +1915,15 @@
#define NV_REG_STR_RM_GSP_OWNED_FAULT_BUFFERS_ENABLE_NO 0x00000000
#define NV_REG_STR_RM_GSP_OWNED_FAULT_BUFFERS_ENABLE_YES 0x00000001
//
// WAR for BlueField3: Bug 4040336
// BF3's PCI MMIO bus address 0x800000000000 is too high for Ampere to address.
// Due to this, BF3's bus address is now moved to < 4GB. So, the CPU PA is no longer
// the same as the bus address and this regkey adjusts the CPU PA passed in to the
// correct bus address.
//
#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3 "RmDmaAdjustPeerMmioBF3"
#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_DISABLE 0
#define NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_ENABLE 1
#endif // NVRM_REGISTRY_H

View File

@ -106,6 +106,7 @@ typedef struct vmiopd_SM_info {
#define NV2080_CTRL_NVLINK_MAX_LINKS_v15_02 6
#define NV2080_CTRL_NVLINK_MAX_LINKS_v1A_18 12
#define NV2080_CTRL_NVLINK_MAX_LINKS_v23_04 24
#define NV0000_CTRL_P2P_CAPS_INDEX_TABLE_SIZE_v15_02 8
#define NV0000_CTRL_P2P_CAPS_INDEX_TABLE_SIZE_v1F_0D 9

View File

@ -31,7 +31,7 @@
#define RPC_VERSION_FROM_VGX_VERSION(major, minor) ( DRF_NUM(_RPC, _VERSION_NUMBER, _MAJOR, major) | \
DRF_NUM(_RPC, _VERSION_NUMBER, _MINOR, minor))
#define VGX_MAJOR_VERSION_NUMBER 0x23
#define VGX_MINOR_VERSION_NUMBER 0x03
#define VGX_MINOR_VERSION_NUMBER 0x05
#define VGX_MAJOR_VERSION_NUMBER_VGPU_12_0 0x1A
#define VGX_MINOR_VERSION_NUMBER_VGPU_12_0 0x18
@ -49,7 +49,7 @@
* 2. This is the first break in migration compatibility after a release.
*/
#define NV_VGPU_GRIDSW_INTERNAL_TO_EXTERNAL_VERSION_MAPPING \
{{0x23, 0x0}, {0x23, 0x03}, {0x12, 0x1}}, \
{{0x23, 0x0}, {0x23, 0x05}, {0x12, 0x1}}, \
{{0x22, 0x0}, {0x22, 0x02}, {0x11, 0x1}}, \
{{0x21, 0x0}, {0x21, 0x0C}, {0x10, 0x1}}, \
{{0x20, 0x0}, {0x20, 0x04}, {0xF, 0x1}}, \

View File

@ -29,7 +29,7 @@
//******************************************************************************
// FIXME XXX
#define NVOC_KERNEL_GRAPHICS_CONTEXT_H_PRIVATE_ACCESS_ALLOWED
#define NVOC_KERNEL_GRAPHICS_CONTEXT_H_PRIVATE_ACCESS_ALLOWED
#include "os/os.h"
#include "core/system.h"
@ -677,17 +677,17 @@ NV_STATUS RmRpcSetGuestSystemInfo(OBJGPU *pGpu, OBJRPC *pRpc)
{
if (rpcVgxVersion.majorNum != 0)
{
if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) && !bSkipRpcVersionHandshake)
{
bSkipRpcVersionHandshake = NV_TRUE;
}
else
{
NV_PRINTF(LEVEL_INFO,
"NVRM_RPC: Skipping RPC version handshake for instance 0x%x\n",
gpuGetInstance(pGpu));
goto skip_ver_handshake;
}
if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_RESUME_CODEPATH) && !bSkipRpcVersionHandshake)
{
bSkipRpcVersionHandshake = NV_TRUE;
}
else
{
NV_PRINTF(LEVEL_INFO,
"NVRM_RPC: Skipping RPC version handshake for instance 0x%x\n",
gpuGetInstance(pGpu));
goto skip_ver_handshake;
}
}
else
{
@ -1325,6 +1325,16 @@ NV_STATUS rpcGspSetSystemInfo_v17_00
rpcInfo->hypervisorType = hypervisorGetHypervisorType(pHypervisor);
rpcInfo->bIsPassthru = pGpu->bIsPassthru;
// Fill in VF related GPU flags
rpcInfo->gspVFInfo.totalVFs = pGpu->sriovState.totalVFs;
rpcInfo->gspVFInfo.firstVFOffset = pGpu->sriovState.firstVFOffset;
rpcInfo->gspVFInfo.FirstVFBar0Address = pGpu->sriovState.firstVFBarAddress[0];
rpcInfo->gspVFInfo.FirstVFBar1Address = pGpu->sriovState.firstVFBarAddress[1];
rpcInfo->gspVFInfo.FirstVFBar2Address = pGpu->sriovState.firstVFBarAddress[2];
rpcInfo->gspVFInfo.b64bitBar0 = pGpu->sriovState.b64bitVFBar0;
rpcInfo->gspVFInfo.b64bitBar1 = pGpu->sriovState.b64bitVFBar1;
rpcInfo->gspVFInfo.b64bitBar2 = pGpu->sriovState.b64bitVFBar2;
OBJTMR *pTmr = GPU_GET_TIMER(pGpu);
rpcInfo->sysTimerOffsetNs = pTmr->sysTimerOffsetNs;

View File

@ -81,6 +81,8 @@ NVLOG_LOGGER NvLogLogger =
NV_STATUS
nvlogInit(void *pData)
{
NV_STATUS status = NV_OK;
nvlogRegRoot = pData;
portInitialize();
NvLogLogger.mainLock = portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged());
@ -88,8 +90,13 @@ nvlogInit(void *pData)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
NvLogLogger.buffersLock = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged());
if (NvLogLogger.buffersLock == NULL)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
tlsInitialize();
return NV_OK;
return status;
}
void nvlogUpdate(void) {
@ -98,22 +105,30 @@ void nvlogUpdate(void) {
NV_STATUS
nvlogDestroy(void)
{
NV_STATUS status = NV_OK;
NvU32 i;
tlsShutdown();
for (i = 0; i < NVLOG_MAX_BUFFERS; i++)
{
nvlogDeallocBuffer(i, NV_TRUE);
}
if (NvLogLogger.mainLock != NULL)
{
portSyncSpinlockDestroy(NvLogLogger.mainLock);
NvLogLogger.mainLock = NULL;
}
if (NvLogLogger.buffersLock != NULL)
{
portSyncMutexDestroy(NvLogLogger.buffersLock);
NvLogLogger.buffersLock = NULL;
}
tlsShutdown();
/// @todo Destructor should return void.
portShutdown();
return NV_OK;
return status;
}
static NV_STATUS
@ -228,6 +243,7 @@ nvlogAllocBuffer
return status;
}
portSyncMutexAcquire(NvLogLogger.buffersLock);
portSyncSpinlockAcquire(NvLogLogger.mainLock);
if (NvLogLogger.nextFree < NVLOG_MAX_BUFFERS)
@ -249,6 +265,7 @@ nvlogAllocBuffer
else break;
}
portSyncSpinlockRelease(NvLogLogger.mainLock);
portSyncMutexRelease(NvLogLogger.buffersLock);
if (status != NV_OK)
{
@ -282,11 +299,13 @@ nvlogDeallocBuffer
_YES, pBuffer->flags);
while (pBuffer->threadCount > 0) { /*spin*/ }
portSyncMutexAcquire(NvLogLogger.buffersLock);
portSyncSpinlockAcquire(NvLogLogger.mainLock);
NvLogLogger.pBuffers[hBuffer] = NULL;
NvLogLogger.nextFree = NV_MIN(hBuffer, NvLogLogger.nextFree);
NvLogLogger.totalFree++;
portSyncSpinlockRelease(NvLogLogger.mainLock);
portSyncMutexRelease(NvLogLogger.buffersLock);
_deallocateNvlogBuffer(pBuffer);
}

View File

@ -732,3 +732,141 @@ kbifGetBusOptionsAddr_GH100
return status;
}
/*!
* @brief: Get BAR information from PCIe config space
*
* @param[in] pGpu OBJGPU pointer
* @param[in] barRegCSBase The base register 0 address
* @param[in] barIndex The BAR index to check
* @param[out] pBarBaseAddress The start address of the specified BAR
* @param[out] pIs64BitBar To indicate if the BAR is using 64bit address
*
* @returns NV_STATUS
*/
static NV_STATUS
_kbifGetBarInfo_GH100
(
OBJGPU *pGpu,
NvU32 barRegCSBase,
NvU32 barIndex,
NvU64 *pBarBaseAddress,
NvBool *pIs64BitBar
)
{
NV_STATUS status = NV_OK;
NvBool barIs64Bit = NV_FALSE;
NvU32 barAddrLow = 0;
NvU32 barAddrHigh = 0;
NvU32 barRegCSLimit = barRegCSBase + NV_EP_PCFG_GPU_BARREG5 - NV_EP_PCFG_GPU_BARREG0;
NvU32 barRegCSOffset = barRegCSBase;
NvU64 barBaseAddr = 0;
NvU32 i = 0;
for (i = 0; i <= barIndex; i++)
{
if ((status = GPU_BUS_CFG_CYCLE_RD32(pGpu, barRegCSOffset, &barAddrLow)) != NV_OK)
{
return status;
}
//
// The SPACE_TYPE, ADDRESS_TYPE, PREFETCHABLE and BASE_ADDRESS fields
// have the same definition as for Base Address Register 0
//
barIs64Bit = FLD_TEST_DRF(_EP_PCFG_GPU, _BARREG0, _REG_ADDR_TYPE, _64BIT, barAddrLow);
if (i != barIndex)
{
barRegCSOffset += (barIs64Bit ? 8 : 4);
if (barRegCSOffset >= barRegCSLimit)
{
return NV_ERR_INVALID_INDEX;
}
}
}
if (pBarBaseAddress != NULL)
{
// Get the BAR address
barBaseAddr = barAddrLow & 0xFFFFFFF0;
if (barIs64Bit)
{
// Read and save the bar high address
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, barRegCSOffset + 4, &barAddrHigh);
NV_ASSERT_OR_RETURN((status == NV_OK), status);
barBaseAddr |= (NvU64)barAddrHigh << 32;
}
*pBarBaseAddress = barBaseAddr;
}
if (pIs64BitBar != NULL)
{
*pIs64BitBar = barIs64Bit;
}
return NV_OK;
}
/*! @brief Fetch VF details such as no. of VFs, First VF offset etc
*
* @param[in] pGpu GPU object pointer
* @param[in] pKernelBif Kernel BIF object pointer
*/
void
kbifCacheVFInfo_GH100
(
OBJGPU *pGpu,
KernelBif *pKernelBif
)
{
NV_STATUS status = NV_OK;
NvU32 regVal = 0;
NvU64 barAddr = 0;
NvBool barIs64Bit = NV_FALSE;
// Get total VF count
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF, &regVal);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_SRIOV_INIT_TOT_VF\n");
return;
}
pGpu->sriovState.totalVFs = GPU_DRF_VAL(_EP_PCFG_GPU, _SRIOV_INIT_TOT_VF,
_TOTAL_VFS, regVal);
// Get first VF offset
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE, &regVal);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_SRIOV_FIRST_VF_STRIDE\n");
return;
}
pGpu->sriovState.firstVFOffset = GPU_DRF_VAL(_EP_PCFG_GPU, _SRIOV_FIRST_VF_STRIDE,
_FIRST_VF_OFFSET, regVal);
// Get VF BAR0 info
status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 0, &barAddr, &barIs64Bit);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[0] = barAddr;
pGpu->sriovState.b64bitVFBar0 = barIs64Bit;
// Get VF BAR1 info
status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 1, &barAddr, &barIs64Bit);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[1] = barAddr;
pGpu->sriovState.b64bitVFBar1 = barIs64Bit;
// Get VF BAR2 info
status = _kbifGetBarInfo_GH100(pGpu, NV_EP_PCFG_GPU_VF_BAR0, 2, &barAddr, &barIs64Bit);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[2] = barAddr;
pGpu->sriovState.b64bitVFBar2 = barIs64Bit;
}

View File

@ -24,6 +24,7 @@
/* ------------------------- System Includes -------------------------------- */
#include "gpu/bif/kernel_bif.h"
#include "gpu/bus/kern_bus.h"
#include "gpu/gpu.h"
#define NV_VGPU_EMU 0x0000FFFF:0x0000F000 /* RW--D */
@ -199,3 +200,56 @@ kbifGetVFSparseMmapRegions_TU102
*pNumAreas = idx;
return NV_OK;
}
/*! @brief Fetch VF details such as no. of VFs, First VF offset etc
*
* @param[in] pGpu GPU object pointer
* @param[in] pKernelBif Kernel BIF object pointer
*/
void
kbifCacheVFInfo_TU102
(
OBJGPU *pGpu,
KernelBif *pKernelBif
)
{
NV_STATUS status = NV_OK;
NvU32 regVal = 0;
NvU32 saveLo = 0;
NvU32 saveHi = 0;
// Get total VF count
GPU_BUS_CFG_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR3, &regVal);
pGpu->sriovState.totalVFs = GPU_DRF_VAL(_XVE, _SRIOV_CAP_HDR3,
_TOTAL_VFS, regVal);
// Get first VF offset
GPU_BUS_CFG_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR5, &regVal);
pGpu->sriovState.firstVFOffset = GPU_DRF_VAL(_XVE, _SRIOV_CAP_HDR5,
_FIRST_VF_OFFSET, regVal);
// Get VF BAR0 first address
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR9, &saveLo);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[0] = saveLo & 0xFFFFFFF0;
// Get VF BAR1 first address
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR10, &saveLo);
NV_ASSERT(status == NV_OK);
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR11_VF_BAR1_HI, &saveHi);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[1] = (((NvU64)saveHi) << 32) + (saveLo & 0xFFFFFFF0);
// Get VF BAR2 first address
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR12, &saveLo);
NV_ASSERT(status == NV_OK);
status = GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_XVE_SRIOV_CAP_HDR13_VF_BAR2_HI, &saveHi);
NV_ASSERT(status == NV_OK);
pGpu->sriovState.firstVFBarAddress[2] = (((NvU64)saveHi) << 32) + (saveLo & 0xFFFFFFF0);
// Get if VF BARs are 64 bit addressable
regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_SRIOV_CAP_HDR10);
pGpu->sriovState.b64bitVFBar1 = IS_BAR_64(regVal);
regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_SRIOV_CAP_HDR12);
pGpu->sriovState.b64bitVFBar2 = IS_BAR_64(regVal);
}

View File

@ -76,6 +76,9 @@ kbifConstructEngine_IMPL
// Cache MNOC interface support
kbifIsMnocSupported_HAL(pGpu, pKernelBif);
// Cache VF info
kbifCacheVFInfo_HAL(pGpu, pKernelBif);
// Used to track when the link has gone into Recovery, which can cause CEs.
pKernelBif->EnteredRecoverySinceErrorsLastChecked = NV_FALSE;

View File

@ -21,6 +21,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#define NVOC_CCSL_H_PRIVATE_ACCESS_ALLOWED
#include "core/prelude.h"
#include "rmconfig.h"
#include "kernel/gpu/conf_compute/ccsl.h"
@ -34,33 +36,8 @@
#include <hal/library/cryptlib.h>
#include "cc_drv.h"
struct ccslContext_t
{
NvHandle hClient;
NvHandle hChannel;
enum {CSL_MSG_CTR_32, CSL_MSG_CTR_64} msgCounterSize;
NvU8 keyIn[CC_AES_256_GCM_KEY_SIZE_BYTES];
union
{
struct
{
NvU8 ivIn[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU8 ivMaskIn[CC_AES_256_GCM_IV_SIZE_BYTES];
};
NvU8 nonce[CC_HMAC_NONCE_SIZE_BYTES];
};
NvU8 keyOut[CC_AES_256_GCM_KEY_SIZE_BYTES];
NvU8 ivOut[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU8 ivMaskOut[CC_AES_256_GCM_IV_SIZE_BYTES];
NvU64 keyHandleIn;
NvU64 keyHandleOut;
};
static void ccslSplit32(NvU8 *dst, NvU32 num)
static void
ccslSplit32(NvU8 *dst, NvU32 num)
{
dst[3] = (NvU8) (num >> 24);
dst[2] = (NvU8) (num >> 16);
@ -68,7 +45,8 @@ static void ccslSplit32(NvU8 *dst, NvU32 num)
dst[0] = (NvU8) (num);
}
static void ccslSplit64(NvU8 *dst, NvU64 num)
static void
ccslSplit64(NvU8 *dst, NvU64 num)
{
dst[7] = (NvU8) (num >> 56);
dst[6] = (NvU8) (num >> 48);
@ -80,41 +58,8 @@ static void ccslSplit64(NvU8 *dst, NvU64 num)
dst[0] = (NvU8) (num);
}
static NV_STATUS incrementCounter(pCcslContext pCtx, NvU8 *ctr)
{
NvU32 msgCounterLo = NvU32_BUILD(ctr[3], ctr[2], ctr[1], ctr[0]);
switch (pCtx->msgCounterSize)
{
case CSL_MSG_CTR_32:
if (msgCounterLo == NV_U32_MAX)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
++msgCounterLo;
ccslSplit32(ctr, msgCounterLo);
break;
case CSL_MSG_CTR_64:
{
NvU32 msgCounterhi = NvU32_BUILD(ctr[7], ctr[6], ctr[5], ctr[4]);
NvU64 msgCounter = ((NvU64) msgCounterhi << 32) | msgCounterLo;
if (msgCounter == NV_U64_MAX)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
++msgCounter;
ccslSplit64(ctr, msgCounter);
break;
}
}
return NV_OK;
}
static void writeKmbToContext
static void
writeKmbToContext
(
pCcslContext pCtx,
CC_KMB *kmb
@ -159,7 +104,52 @@ static void writeKmbToContext
}
NV_STATUS
ccslContextInitViaChannel
ccslIncrementCounter_IMPL
(
pCcslContext pCtx,
NvU8 *ctr,
NvU64 increment
)
{
NvU32 msgCounterLo = NvU32_BUILD(ctr[3], ctr[2], ctr[1], ctr[0]);
switch (pCtx->msgCounterSize)
{
case CSL_MSG_CTR_32:
if (increment > NV_U32_MAX)
{
return NV_ERR_INVALID_ARGUMENT;
}
if (msgCounterLo > (NV_U32_MAX - increment))
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
msgCounterLo += increment;
ccslSplit32(ctr, msgCounterLo);
break;
case CSL_MSG_CTR_64:
{
NvU32 msgCounterHi = NvU32_BUILD(ctr[7], ctr[6], ctr[5], ctr[4]);
NvU64 msgCounter = ((NvU64) msgCounterHi << 32) | msgCounterLo;
if (msgCounterLo > (NV_U64_MAX - increment))
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
msgCounter += increment;
ccslSplit64(ctr, msgCounter);
break;
}
}
return NV_OK;
}
NV_STATUS
ccslContextInitViaChannel_IMPL
(
pCcslContext *ppCtx,
NvHandle hClient,
@ -228,11 +218,19 @@ ccslContextInitViaChannel
writeKmbToContext(pCtx, kmb);
}
nvDbgDumpBufferBytes(kmb->encryptBundle.iv, sizeof(kmb->encryptBundle.iv));
nvDbgDumpBufferBytes(kmb->encryptBundle.ivMask, sizeof(kmb->encryptBundle.ivMask));
nvDbgDumpBufferBytes(kmb->encryptBundle.key, sizeof(kmb->encryptBundle.key));
nvDbgDumpBufferBytes(kmb->decryptBundle.iv, sizeof(kmb->decryptBundle.iv));
nvDbgDumpBufferBytes(kmb->decryptBundle.ivMask, sizeof(kmb->decryptBundle.ivMask));
nvDbgDumpBufferBytes(kmb->decryptBundle.key, sizeof(kmb->decryptBundle.key));
return NV_OK;
}
NV_STATUS
ccslContextInitViaKeyId
ccslContextInitViaKeyId_KERNEL
(
ConfidentialCompute *pConfCompute,
pCcslContext *ppCtx,
@ -282,7 +280,7 @@ ccslContextInitViaKeyId
}
void
ccslContextClear
ccslContextClear_IMPL
(
pCcslContext pCtx
)
@ -298,52 +296,7 @@ ccslContextClear
}
NV_STATUS
ccslLogDeviceEncryption
(
pCcslContext pCtx,
NvU8 *decryptIv
)
{
NV_STATUS status;
status = incrementCounter(pCtx, pCtx->ivIn);
if (status != NV_OK)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
portMemCopy(decryptIv, CC_AES_256_GCM_IV_SIZE_BYTES, pCtx->ivIn, CC_AES_256_GCM_IV_SIZE_BYTES);
return NV_OK;
}
NV_STATUS
ccslAcquireEncryptionIv
(
pCcslContext pCtx,
NvU8 *encryptIv
)
{
NV_STATUS status;
status = incrementCounter(pCtx, pCtx->ivOut);
if (status != NV_OK)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
portMemCopy(encryptIv, CC_AES_256_GCM_IV_SIZE_BYTES, pCtx->ivOut, CC_AES_256_GCM_IV_SIZE_BYTES);
// The "freshness" bit is right after the IV.
encryptIv[CC_AES_256_GCM_IV_SIZE_BYTES] = 1;
return NV_OK;
}
NV_STATUS
ccslRotateIv
ccslRotateIv_IMPL
(
pCcslContext pCtx,
NvU8 direction
@ -422,12 +375,14 @@ ccslRotateIv
}
NV_STATUS
ccslEncryptWithIv
ccslEncryptWithIv_IMPL
(
pCcslContext pCtx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 *encryptIv,
NvU8 const *aadBuffer,
NvU32 aadSize,
NvU8 *outputBuffer,
NvU8 *authTagBuffer
)
@ -449,10 +404,10 @@ ccslEncryptWithIv
}
if(!libspdm_aead_aes_gcm_encrypt(
(NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
(NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
{
return NV_ERR_GENERIC;
}
@ -461,11 +416,13 @@ ccslEncryptWithIv
}
NV_STATUS
ccslEncrypt
ccslEncrypt_KERNEL
(
pCcslContext pCtx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 const *aadBuffer,
NvU32 aadSize,
NvU8 *outputBuffer,
NvU8 *authTagBuffer
)
@ -473,7 +430,7 @@ ccslEncrypt
NvU8 iv[CC_AES_256_GCM_IV_SIZE_BYTES] = {0};
size_t outputBufferSize = bufferSize;
if (incrementCounter(pCtx, pCtx->ivOut) != NV_OK)
if (ccslIncrementCounter(pCtx, pCtx->ivOut, 1) != NV_OK)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
@ -484,10 +441,10 @@ ccslEncrypt
}
if(!libspdm_aead_aes_gcm_encrypt(
(NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
(NvU8 *)pCtx->keyOut, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
{
return NV_ERR_GENERIC;
}
@ -496,12 +453,14 @@ ccslEncrypt
}
NV_STATUS
ccslDecrypt
ccslDecrypt_KERNEL
(
pCcslContext pCtx,
NvU32 bufferSize,
NvU8 const *inputBuffer,
NvU8 const *decryptIv,
NvU8 const *aadBuffer,
NvU32 aadSize,
NvU8 *outputBuffer,
NvU8 const *authTagBuffer
)
@ -509,9 +468,14 @@ ccslDecrypt
NvU8 iv[CC_AES_256_GCM_IV_SIZE_BYTES] = {0};
size_t outputBufferSize = bufferSize;
if ((bufferSize == 0) || ((aadBuffer != NULL) && (aadSize == 0)))
{
return NV_ERR_INVALID_ARGUMENT;
}
if (decryptIv == NULL)
{
if (incrementCounter(pCtx, pCtx->ivIn) != NV_OK)
if (ccslIncrementCounter(pCtx, pCtx->ivIn, 1) != NV_OK)
{
return NV_ERR_INSUFFICIENT_RESOURCES;
}
@ -530,10 +494,10 @@ ccslDecrypt
}
if(!libspdm_aead_aes_gcm_decrypt(
(NvU8 *)pCtx->keyIn, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, NULL, 0,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
(NvU8 *)pCtx->keyIn, CC_AES_256_GCM_KEY_SIZE_BYTES,
iv, CC_AES_256_GCM_IV_SIZE_BYTES, aadBuffer, aadSize,
inputBuffer, bufferSize, authTagBuffer, 16,
outputBuffer, &outputBufferSize))
{
return NV_ERR_INVALID_DATA;
}
@ -570,7 +534,7 @@ static NV_STATUS incrementCounter192(NvU8 *ctr)
}
NV_STATUS
ccslSign
ccslSign_IMPL
(
pCcslContext pCtx,
NvU32 bufferSize,
@ -625,11 +589,17 @@ ccslSign
return NV_OK;
}
static NvU64 getMessageCounterAndLimit (pCcslContext pCtx, NvU8 *iv, NvU64 *limit)
static NvU64
getMessageCounterAndLimit
(
pCcslContext pCtx,
NvU8 *iv,
NvU64 *limit
)
{
NvU32 msgCounterLo = NvU32_BUILD(iv[3], iv[2], iv[1], iv[0]);
NvU32 msgCounterHi = NvU32_BUILD(iv[7], iv[6], iv[5], iv[4]);
NvU32 msgCounterHi = NvU32_BUILD(iv[7], iv[6], iv[5], iv[4]);
switch (pCtx->msgCounterSize)
{
case CSL_MSG_CTR_32:
@ -644,7 +614,7 @@ static NvU64 getMessageCounterAndLimit (pCcslContext pCtx, NvU8 *iv, NvU64 *limi
}
NV_STATUS
ccslQueryMessagePool
ccslQueryMessagePool_IMPL
(
pCcslContext pCtx,
NvU8 direction,
@ -670,3 +640,52 @@ ccslQueryMessagePool
return NV_OK;
}
NV_STATUS
ccslIncrementIv_IMPL
(
pCcslContext pCtx,
NvU8 direction,
NvU64 increment,
NvU8 *iv
)
{
NV_STATUS status;
void *ivPtr;
switch (direction)
{
case CCSL_DIR_HOST_TO_DEVICE:
ivPtr = pCtx->ivOut;
break;
case CCSL_DIR_DEVICE_TO_HOST:
ivPtr = pCtx->ivIn;
break;
default:
return NV_ERR_INVALID_ARGUMENT;
}
status = ccslIncrementCounter(pCtx, ivPtr, increment);
if (status != NV_OK)
{
return status;
}
if (iv != NULL) {
portMemCopy(iv, CC_AES_256_GCM_IV_SIZE_BYTES, ivPtr, CC_AES_256_GCM_IV_SIZE_BYTES);
if (direction == CCSL_DIR_HOST_TO_DEVICE)
{
// The "freshness" bit is right after the IV.
iv[CC_AES_256_GCM_IV_SIZE_BYTES] = 1;
}
else
{
// Decrypt IV cannot be used for encryption.
iv[CC_AES_256_GCM_IV_SIZE_BYTES] = 0;
}
}
return NV_OK;
}

View File

@ -203,6 +203,15 @@ kchannelCreateUserdMemDesc_GV100
AT_GPU,
userdOffset);
// Adjust for the DMA window start address, if any
if (memdescGetAddressSpace(pUserdMemDescForSubDev) == ADDR_SYSMEM)
{
RmPhysAddr dmaWindowStart = gpuGetDmaStartAddress(pGpu);
NV_ASSERT_OR_RETURN(userdAddr > dmaWindowStart, NV_ERR_INVALID_ADDRESS);
userdAddr -= dmaWindowStart;
}
userdAddrLo = NvU64_LO32(userdAddr) >> userdShift;
userdAddrHi = NvU64_HI32(userdAddr);

View File

@ -65,26 +65,18 @@ gpuInitSriov_FWCLIENT
GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
NvU32 totalPcieFns = 0;
pGpu->sriovState.totalVFs = pGSCI->sriovCaps.totalVFs;
pGpu->sriovState.firstVFOffset = pGSCI->sriovCaps.firstVfOffset;
pGpu->sriovState.firstVFBarAddress[0] = pGSCI->sriovCaps.FirstVFBar0Address;
pGpu->sriovState.firstVFBarAddress[1] = pGSCI->sriovCaps.FirstVFBar1Address;
pGpu->sriovState.firstVFBarAddress[2] = pGSCI->sriovCaps.FirstVFBar2Address;
pGpu->sriovState.vfBarSize[0] = pGSCI->sriovCaps.bar0Size;
pGpu->sriovState.vfBarSize[1] = pGSCI->sriovCaps.bar1Size;
pGpu->sriovState.vfBarSize[2] = pGSCI->sriovCaps.bar2Size;
pGpu->sriovState.b64bitVFBar0 = pGSCI->sriovCaps.b64bitBar0;
pGpu->sriovState.b64bitVFBar1 = pGSCI->sriovCaps.b64bitBar1;
pGpu->sriovState.b64bitVFBar2 = pGSCI->sriovCaps.b64bitBar2;
pGpu->sriovState.vfBarSize[0] = pGSCI->sriovCaps.bar0Size;
pGpu->sriovState.vfBarSize[1] = pGSCI->sriovCaps.bar1Size;
pGpu->sriovState.vfBarSize[2] = pGSCI->sriovCaps.bar2Size;
pGpu->sriovState.maxGfid = pGSCI->sriovMaxGfid;
pGpu->sriovState.maxGfid = pGSCI->sriovMaxGfid;
// note: pGpu->sriovState.virtualRegPhysOffset is initialized separately
// owned by physical RM, so leave uninitialized
pGpu->sriovState.pP2PInfo = NULL;
pGpu->sriovState.bP2PAllocated = NV_FALSE;
pGpu->sriovState.maxP2pGfid = 0;
pGpu->sriovState.pP2PInfo = NULL;
pGpu->sriovState.bP2PAllocated = NV_FALSE;
pGpu->sriovState.maxP2pGfid = 0;
// Include Physical function that occupies GFID 0
totalPcieFns = pGpu->sriovState.totalVFs + 1;

View File

@ -178,6 +178,13 @@ gpuInitRegistryOverrides_KERNEL
DRF_NUM(_REG_STR, _RM_GPU_FABRIC_PROBE, _OVERRIDE, 1);
}
pGpu->bBf3WarBug4040336Enabled = NV_FALSE;
if (osReadRegistryDword(pGpu, NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3,
&data32) == NV_OK)
{
pGpu->bBf3WarBug4040336Enabled = (data32 == NV_REG_STR_RM_DMA_ADJUST_PEER_MMIO_BF3_ENABLE);
}
return NV_OK;
}

View File

@ -2343,24 +2343,27 @@ kgrctxUnmapAssociatedCtxBuffers_IMPL
// channels could be using these mappings, and we must wait for both
// channels to be detached before we remove them.
//
if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->pChanList != NULL)
{
RS_ORDERED_ITERATOR it;
RsResourceRef *pScopeRef = RES_GET_REF(pKernelChannel);
CHANNEL_NODE *pChanNode;
CHANNEL_LIST *pChanList;
// Iterate over all channels in this TSG and check for duplicate VAS
if (!pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm)
pScopeRef = RES_GET_REF(pKernelChannel->pKernelChannelGroupApi);
pChanList = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->pChanList;
it = kchannelGetIter(RES_GET_CLIENT(pKernelChannel), pScopeRef);
while (clientRefOrderedIterNext(it.pClient, &it))
for (pChanNode = pChanList->pHead; pChanNode; pChanNode = pChanNode->pNext)
{
KernelChannel *pLoopKernelChannel = dynamicCast(it.pResourceRef->pResource, KernelChannel);
NV_ASSERT_OR_RETURN_VOID(pLoopKernelChannel != NULL);
if (pLoopKernelChannel == pKernelChannel)
// Skip the channel we are looking to unmap
if (kchannelGetDebugTag(pKernelChannel) == kchannelGetDebugTag(pChanNode->pKernelChannel))
continue;
NV_CHECK_OR_RETURN_VOID(LEVEL_SILENT, pLoopKernelChannel->pVAS != pKernelChannel->pVAS);
if (pKernelChannel->pVAS == pChanNode->pKernelChannel->pVAS)
{
NV_PRINTF(LEVEL_ERROR, "TSG %d Channel %d shares a pVAS with channel %d\n",
pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->grpID,
kchannelGetDebugTag(pKernelChannel),
kchannelGetDebugTag(pChanNode->pKernelChannel));
return;
}
}
}

View File

@ -310,32 +310,6 @@ kgspExecuteSequencerCommand_GA102
switch (opCode)
{
case GSP_SEQ_BUF_OPCODE_CORE_RESET:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
// Reset falcon
kflcnEnable_HAL(pGpu, pKernelFalcon, NV_FALSE);
kflcnEnable_HAL(pGpu, pKernelFalcon, NV_TRUE);
kflcnDisableCtxReq_HAL(pGpu, pKernelFalcon);
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_START:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
kflcnStartCpu_HAL(pGpu, pKernelFalcon);
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
// Wait for the bootloader to complete execution.
status = kflcnWaitForHalt_HAL(pGpu, pKernelFalcon, GPU_TIMEOUT_DEFAULT, 0);
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_RESUME:
{
RM_RISCV_UCODE_DESC *pRiscvDesc = pKernelGsp->pGspRmBootUcodeDesc;

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -398,7 +398,7 @@ s_executeFwsec_TU102
{
NV_PRINTF(LEVEL_ERROR, "failed to prepare interface data for FWSEC cmd 0x%x: 0x%x\n",
cmd, status);
return status;
goto out;
}
}
else if (pFwsecUcode->bootType == KGSP_FLCN_UCODE_BOOT_WITH_LOADER)
@ -426,7 +426,7 @@ s_executeFwsec_TU102
{
NV_PRINTF(LEVEL_ERROR, "failed to prepare interface data for FWSEC cmd 0x%x: 0x%x\n",
cmd, status);
return status;
goto out;
}
}
else
@ -440,7 +440,7 @@ s_executeFwsec_TU102
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC cmd 0x%x: status 0x%x\n", cmd, status);
return status;
goto out;
}
if (cmd == FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_FRTS)
@ -456,7 +456,8 @@ s_executeFwsec_TU102
if (frtsErrCode != NV_VBIOS_FWSECLIC_FRTS_ERR_CODE_NONE)
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for FRTS: FRTS error code 0x%x\n", frtsErrCode);
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
data = GPU_REG_RD32(pGpu, NV_PFB_PRI_MMU_WPR2_ADDR_HI);
@ -464,7 +465,8 @@ s_executeFwsec_TU102
if (wpr2HiVal == 0)
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for FRTS: no initialized WPR2 found\n");
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
data = GPU_REG_RD32(pGpu, NV_PFB_PRI_MMU_WPR2_ADDR_LO);
@ -475,7 +477,8 @@ s_executeFwsec_TU102
NV_PRINTF(LEVEL_ERROR,
"failed to execute FWSEC for FRTS: WPR2 initialized at an unexpected location: 0x%08x (expected 0x%08x)\n",
wpr2LoVal, expectedLoVal);
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
}
else // i.e. FALCON_APPLICATION_INTERFACE_DMEM_MAPPER_V3_CMD_SB
@ -487,14 +490,16 @@ s_executeFwsec_TU102
_READ_PROTECTION_LEVEL0, _ENABLE))
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: GFW PLM not lowered\n");
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
if (!GPU_FLD_TEST_DRF_DEF(pGpu, _PGC6, _AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT,
_PROGRESS, _COMPLETED))
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: GFW progress not completed\n");
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
data = GPU_REG_RD32(pGpu, NV_PBUS_VBIOS_SCRATCH(NV_VBIOS_FWSECLIC_SCRATCH_INDEX_15));
@ -502,10 +507,17 @@ s_executeFwsec_TU102
if (sbErrCode != NV_VBIOS_FWSECLIC_SB_ERR_CODE_NONE)
{
NV_PRINTF(LEVEL_ERROR, "failed to execute FWSEC for SB: SB error code 0x%x\n", sbErrCode);
return NV_ERR_GENERIC;
status = NV_ERR_GENERIC;
goto out;
}
}
out:
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "(note: VBIOS version %s)\n", pKernelGsp->vbiosVersionStr);
}
return status;
}

View File

@ -949,11 +949,12 @@ kgspWaitForGfwBootOk_TU102
}
// The wait failed if we reach here (as above loop returns upon success).
NV_PRINTF(LEVEL_ERROR, "failed to wait for GFW_BOOT: 0x%x (progress 0x%x)\n",
NV_PRINTF(LEVEL_ERROR, "failed to wait for GFW_BOOT: 0x%x (progress 0x%x, VBIOS version %s)\n",
status, GPU_REG_RD_DRF(pGpu,
_PGC6,
_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT,
_PROGRESS));
_PROGRESS),
pKernelGsp->vbiosVersionStr);
NV_PRINTF(LEVEL_ERROR, "(the GPU may be in a bad state and may need to be reset)\n");
return status;

View File

@ -2147,6 +2147,26 @@ done:
return nvStatus;
}
/*!
* Convert VBIOS version containing Version and OemVersion packed together to
* a string representation.
*
* Example:
* for Version 0x05400001, OemVersion 0x12
* input argument vbiosVersionCombined 0x0540000112
* output str "5.40.00.01.12"
*/
static void
_kgspVbiosVersionToStr(NvU64 vbiosVersionCombined, char *pVbiosVersionStr, NvU32 size)
{
nvDbgSnprintf(pVbiosVersionStr, size, "%2X.%02X.%02X.%02X.%02X",
(vbiosVersionCombined >> 32) & 0xff,
(vbiosVersionCombined >> 24) & 0xff,
(vbiosVersionCombined >> 16) & 0xff,
(vbiosVersionCombined >> 8) & 0xff,
(vbiosVersionCombined) & 0xff);
}
/*!
* Initialize GSP-RM
*
@ -2200,21 +2220,34 @@ kgspInitRm_IMPL
{
KernelGspVbiosImg *pVbiosImg = NULL;
// Start VBIOS version string as "unknown"
portStringCopy(pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr), "unknown", sizeof("unknown"));
// Try and extract a VBIOS image.
status = kgspExtractVbiosFromRom_HAL(pGpu, pKernelGsp, &pVbiosImg);
if (status == NV_OK)
{
NvU64 vbiosVersionCombined = 0;
// Got a VBIOS image, now parse it for FWSEC.
status = kgspParseFwsecUcodeFromVbiosImg(pGpu, pKernelGsp, pVbiosImg,
&pKernelGsp->pFwsecUcode);
&pKernelGsp->pFwsecUcode, &vbiosVersionCombined);
kgspFreeVbiosImg(pVbiosImg);
if (vbiosVersionCombined > 0)
{
_kgspVbiosVersionToStr(vbiosVersionCombined, pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr));
}
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode from VBIOS image: 0x%x\n",
status);
NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode from VBIOS image (VBIOS version %s): 0x%x\n",
pKernelGsp->vbiosVersionStr, status);
goto done;
}
NV_PRINTF(LEVEL_INFO, "parsed VBIOS version %s\n", pKernelGsp->vbiosVersionStr);
}
else if (status == NV_ERR_NOT_SUPPORTED)
{
@ -2291,7 +2324,22 @@ kgspInitRm_IMPL
goto done;
}
status = kgspCalculateFbLayout(pGpu, pKernelGsp, pGspFw);
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, _kgspInitLibosLogDecoder(pGpu, pKernelGsp, pGspFw), done);
// Wait for GFW_BOOT OK status
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
// Fail early if WPR2 is up
if (kgspIsWpr2Up_HAL(pGpu, pKernelGsp))
{
NV_PRINTF(LEVEL_ERROR, "unexpected WPR2 already up, cannot proceed with booting gsp\n");
NV_PRINTF(LEVEL_ERROR, "(the GPU is likely in a bad state and may need to be reset)\n");
status = NV_ERR_INVALID_STATE;
goto done;
}
// Calculate FB layout (requires knowing FB size which depends on GFW_BOOT)
status = kgspCalculateFbLayout_HAL(pGpu, pKernelGsp, pGspFw);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "Error calculating FB layout\n");
@ -2326,20 +2374,6 @@ kgspInitRm_IMPL
}
}
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, _kgspInitLibosLogDecoder(pGpu, pKernelGsp, pGspFw), done);
// Wait for GFW_BOOT OK status
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
// Fail early if WPR2 is up
if (kgspIsWpr2Up_HAL(pGpu, pKernelGsp))
{
NV_PRINTF(LEVEL_ERROR, "unexpected WPR2 already up, cannot proceed with booting gsp\n");
NV_PRINTF(LEVEL_ERROR, "(the GPU is likely in a bad state and may need to be reset)\n");
status = NV_ERR_INVALID_STATE;
goto done;
}
// bring up ucode with RM offload task
status = kgspBootstrapRiscvOSEarly_HAL(pGpu, pKernelGsp, pGspFw);
if (status != NV_OK)
@ -2447,6 +2481,19 @@ kgspUnloadRm_IMPL
status = kgspExecuteBooterUnloadIfNeeded_HAL(pGpu, pKernelGsp, 0);
}
//
// To fix boot issue after GPU reset on ESXi config:
// We still do not have root cause but looks like some sanity is failing during boot after reset is done.
// As temp WAR, add delay of 250 ms after gsp rm unload is done.
// Limit this to [VGPU-GSP] supported configs only and when we are in GPU RESET path.
//
if (API_GPU_IN_RESET_SANITY_CHECK(pGpu) &&
gpuIsSriovEnabled(pGpu) &&
IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
{
osDelay(250);
}
if (rpcStatus != NV_OK)
{
return rpcStatus;
@ -2469,6 +2516,9 @@ kgspDestruct_IMPL
if (!IS_GSP_CLIENT(pGpu))
return;
// set VBIOS version string back to "unknown"
portStringCopy(pKernelGsp->vbiosVersionStr, sizeof(pKernelGsp->vbiosVersionStr), "unknown", sizeof("unknown"));
kgspFreeFlcnUcode(pKernelGsp->pFwsecUcode);
pKernelGsp->pFwsecUcode = NULL;
@ -3372,6 +3422,31 @@ kgspExecuteSequencerBuffer_IMPL
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_RESET:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
kflcnReset_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
kflcnDisableCtxReq_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_START:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
kflcnStartCpu_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon));
break;
}
case GSP_SEQ_BUF_OPCODE_CORE_WAIT_FOR_HALT:
{
NV_ASSERT_OR_RETURN(payloadSize == 0, NV_ERR_INVALID_ARGUMENT);
NV_ASSERT_OK_OR_RETURN(kflcnWaitForHalt_HAL(pGpu, staticCast(pKernelGsp, KernelFalcon), GPU_TIMEOUT_DEFAULT, 0));
break;
}
default:
//
// Route this command to the arch-specific handler.

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -67,6 +67,18 @@ struct BIT_TOKEN_V1_00
#define BIT_TOKEN_V1_00_FMT "2b2w"
typedef struct BIT_TOKEN_V1_00 BIT_TOKEN_V1_00;
#define BIT_TOKEN_BIOSDATA 0x42
// structure for only version info from BIT_DATA_BIOSDATA_V1 and BIT_DATA_BIOSDATA_V2
typedef struct
{
bios_U032 Version; // BIOS Binary Version Ex. 5.40.00.01.12 = 0x05400001
bios_U008 OemVersion; // OEM Version Number Ex. 5.40.00.01.12 = 0x12
} BIT_DATA_BIOSDATA_BINVER;
#define BIT_DATA_BIOSDATA_BINVER_FMT "1d1b"
#define BIT_DATA_BIOSDATA_BINVER_SIZE_5 5
#define BIT_TOKEN_FALCON_DATA 0x70
typedef struct
@ -441,6 +453,7 @@ s_vbiosFindBitHeader
* @param[in] bitAddr Offset of BIT header within VBIOS image
* @param[in] bUseDebugFwsec Whether to look for debug or prod FWSEC
* @param[out] pFwsecUcodeDescFromBit Resulting ucode desc
* @param[out] pVbiosVersionCombined (optional) output VBIOS version
*/
static NV_STATUS
s_vbiosParseFwsecUcodeDescFromBit
@ -448,7 +461,8 @@ s_vbiosParseFwsecUcodeDescFromBit
const KernelGspVbiosImg * const pVbiosImg,
const NvU32 bitAddr,
const NvBool bUseDebugFwsec,
FlcnUcodeDescFromBit *pFwsecUcodeDescFromBit // out
FlcnUcodeDescFromBit *pFwsecUcodeDescFromBit, // out
NvU64 *pVbiosVersionCombined // out
)
{
@ -491,6 +505,26 @@ s_vbiosParseFwsecUcodeDescFromBit
continue;
}
// catch BIOSDATA token (for capturing VBIOS version)
if (pVbiosVersionCombined != NULL &&
bitToken.TokenId == BIT_TOKEN_BIOSDATA &&
((bitToken.DataVersion == 1) || (bitToken.DataVersion == 2)) &&
bitToken.DataSize > BIT_DATA_BIOSDATA_BINVER_SIZE_5)
{
BIT_DATA_BIOSDATA_BINVER binver;
status = s_vbiosReadStructure(pVbiosImg, &binver,
bitToken.DataPtr, BIT_DATA_BIOSDATA_BINVER_FMT);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR,
"failed to read BIOSDATA (BIT token %u), skipping: 0x%x\n",
tokIdx, status);
continue;
}
*pVbiosVersionCombined = (((NvU64) binver.Version) << 8) | ((NvU32) binver.OemVersion);
}
// skip tokens that are not for falcon ucode data v2
if (bitToken.TokenId != BIT_TOKEN_FALCON_DATA ||
bitToken.DataVersion != 2 ||
@ -1037,10 +1071,11 @@ s_vbiosNewFlcnUcodeFromDesc
* The resulting KernelGspFlcnUcode should be freed with kgspFlcnUcodeFree
* after use.
*
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelGsp KernelGsp pointer
* @param[in] pVbiosImg VBIOS image
* @param[out] ppFwsecUcode Pointer to resulting KernelGspFlcnUcode
* @param[in] pGpu OBJGPU pointer
* @param[in] pKernelGsp KernelGsp pointer
* @param[in] pVbiosImg VBIOS image
* @param[out] ppFwsecUcode Pointer to resulting KernelGspFlcnUcode
* @param[out] pVbiosVersionCombined (optional) pointer to output VBIOS version
*/
NV_STATUS
kgspParseFwsecUcodeFromVbiosImg_IMPL
@ -1048,7 +1083,8 @@ kgspParseFwsecUcodeFromVbiosImg_IMPL
OBJGPU *pGpu,
KernelGsp *pKernelGsp,
const KernelGspVbiosImg * const pVbiosImg,
KernelGspFlcnUcode **ppFwsecUcode // out
KernelGspFlcnUcode **ppFwsecUcode, // out
NvU64 *pVbiosVersionCombined // out
)
{
NV_STATUS status;
@ -1072,7 +1108,8 @@ kgspParseFwsecUcodeFromVbiosImg_IMPL
}
bUseDebugFwsec = kgspIsDebugModeEnabled_HAL(pGpu, pKernelGsp);
status = s_vbiosParseFwsecUcodeDescFromBit(pVbiosImg, bitAddr, bUseDebugFwsec, &fwsecUcodeDescFromBit);
status = s_vbiosParseFwsecUcodeDescFromBit(pVbiosImg, bitAddr, bUseDebugFwsec,
&fwsecUcodeDescFromBit, pVbiosVersionCombined);
if (status != NV_OK)
{
NV_PRINTF(LEVEL_ERROR, "failed to parse FWSEC ucode desc from VBIOS image: 0x%x\n", status);

View File

@ -70,7 +70,7 @@ static NV_STATUS _memUtilsAllocateUserD(OBJGPU *pGpu, MemoryManager *pMemoryMana
NvHandle hDeviceId, OBJCHANNEL *pChannel);
static NV_STATUS _memUtilsMapUserd_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager,
OBJCHANNEL *pChannel, NvHandle hClientId, NvHandle hDeviceId,
NvHandle hChannelId);
NvHandle hChannelId, NvBool bUseRmApiForBar1);
static NV_STATUS _memUtilsAllocateReductionSema(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel);
static NvU32 _ceChannelScheduleBatchWork_GM107(OBJGPU *pGpu, MemoryManager *pMemoryManager, OBJCHANNEL *pChannel,
RmPhysAddr src, NV_ADDRESS_SPACE srcAddressSpace, NvU32 srcCpuCacheAttrib,
@ -378,27 +378,28 @@ memmgrMemUtilsChannelInitialize_GM107
OBJCHANNEL *pChannel
)
{
NV_STATUS rmStatus;
NV_STATUS lockStatus;
RsClient *pRsClient;
NvHandle hClient;
NvHandle hDevice; // device handle
NvHandle hPhysMem; // memory handle
NvU64 size;
NvHandle hChannel; // channel handle
NvHandle hErrNotifierVirt;
NvHandle hErrNotifierPhys;
NvHandle hPushBuffer;
RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
Heap *pHeap = GPU_GET_HEAP(pGpu);
NvBool bMIGInUse = IS_MIG_IN_USE(pGpu);
NvU8 *pErrNotifierCpuVA = NULL;
NV_ADDRESS_SPACE userdAddrSpace;
NV_ADDRESS_SPACE pushBuffAddrSpace;
NV_ADDRESS_SPACE gpFifoAddrSpace;
OBJSYS *pSys = SYS_GET_INSTANCE();
OBJCL *pCl = SYS_GET_CL(pSys);
NvU32 cacheSnoopFlag = 0 ;
NV_STATUS rmStatus;
NV_STATUS lockStatus;
RsClient *pRsClient;
NvHandle hClient;
NvHandle hDevice;
NvHandle hPhysMem;
NvU64 size;
NvHandle hChannel;
NvHandle hErrNotifierVirt;
NvHandle hErrNotifierPhys;
NvHandle hPushBuffer;
RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
Heap *pHeap = GPU_GET_HEAP(pGpu);
NvBool bMIGInUse = IS_MIG_IN_USE(pGpu);
NvU8 *pErrNotifierCpuVA = NULL;
NV_ADDRESS_SPACE userdAddrSpace;
NV_ADDRESS_SPACE pushBuffAddrSpace;
NV_ADDRESS_SPACE gpFifoAddrSpace;
OBJSYS *pSys = SYS_GET_INSTANCE();
OBJCL *pCl = SYS_GET_CL(pSys);
NvU32 cacheSnoopFlag = 0 ;
NvBool bUseRmApiForBar1 = NV_FALSE;
//
// Heap alloc one chunk of memory to hold all of our alloc parameters to
@ -858,46 +859,64 @@ memmgrMemUtilsChannelInitialize_GM107
rmStatus,
LEVEL_ERROR,
_memUtilsMapUserd_GM107(pGpu, pMemoryManager, pChannel,
hClient, hDevice, hChannel),
hClient, hDevice, hChannel, bUseRmApiForBar1),
exit_free_client);
//
// map cpu pointer
// Map the pushbuffer memory to CPU viewable region
//
NV_CHECK_OK_OR_GOTO(
rmStatus,
LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi,
hClient,
hDevice,
hPhysMem,
0,
size,
(void **)&pChannel->pbCpuVA,
0),
exit_free_client);
// Set up pushbuffer and semaphore memdesc and memset the buffer
pChannel->pChannelBufferMemdesc =
memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hPhysMem);
NV_ASSERT_OR_GOTO(pChannel->pChannelBufferMemdesc != NULL, exit_free_client);
portMemSet(pChannel->pbCpuVA, 0, (NvLength)size);
// Set up notifier memory
pChannel->pErrNotifierMemdesc =
memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, pChannel->hClient, hErrNotifierPhys);
NV_ASSERT_OR_GOTO(pChannel->pErrNotifierMemdesc != NULL, exit_free_client);
// Map the notifier memory to CPU viewable region
NV_CHECK_OK_OR_GOTO(
rmStatus,
LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi,
hClient,
hDevice,
hErrNotifierPhys,
0,
pChannel->channelNotifierSize,
(void **)&pErrNotifierCpuVA,
0),
exit_free_client);
if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
{
rmStatus = memmgrMemDescMemSet(pMemoryManager, pChannel->pChannelBufferMemdesc, 0,
(TRANSFER_FLAGS_SHADOW_ALLOC | TRANSFER_FLAGS_SHADOW_INIT_MEM));
NV_ASSERT_OR_GOTO(rmStatus == NV_OK, exit_free_client);
pChannel->pTokenFromNotifier =
(NvNotification *)(pErrNotifierCpuVA +
(NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN *
sizeof(NvNotification)));
pChannel->pbCpuVA = NULL;
pChannel->pTokenFromNotifier = NULL;
}
else
{
if (bUseRmApiForBar1)
{
NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi, hClient, hDevice, hPhysMem, 0, size,
(void **)&pChannel->pbCpuVA, 0),
exit_free_client);
NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi, hClient, hDevice, hErrNotifierPhys, 0,
pChannel->channelNotifierSize, (void **)&pErrNotifierCpuVA, 0),
exit_free_client);
}
else
{
//
// Most use cases can migrate to the internal memdescMap path for BAR1
// And it is preferred because external path will not work with CC
//
pChannel->pbCpuVA = memmgrMemDescBeginTransfer(pMemoryManager,
pChannel->pChannelBufferMemdesc, TRANSFER_FLAGS_USE_BAR1);
NV_ASSERT_OR_GOTO(pChannel->pbCpuVA != NULL, exit_free_client);
pErrNotifierCpuVA = memmgrMemDescBeginTransfer(pMemoryManager,
pChannel->pErrNotifierMemdesc, TRANSFER_FLAGS_USE_BAR1);
NV_ASSERT_OR_GOTO(pErrNotifierCpuVA != NULL, exit_free_client);
}
portMemSet(pChannel->pbCpuVA, 0, (NvLength)size);
pChannel->pTokenFromNotifier =
(NvNotification *)(pErrNotifierCpuVA +
(NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN *
sizeof(NvNotification)));
}
//
// Allocate and map the doorbell region to use in scrub on free
@ -1188,7 +1207,12 @@ memmgrMemUtilsCopyEngineInitialize_GM107
// initialize the channel parameters (should be done by the parent object)
pChannel->channelPutOffset = 0;
MEM_WR32(pChannel->pbCpuVA + pChannel->semaOffset, 0);
if (pChannel->pbCpuVA != NULL)
{
MEM_WR32(pChannel->pbCpuVA + pChannel->semaOffset, 0);
}
return NV_OK;
exit_free:
@ -1300,25 +1324,48 @@ _memUtilsMapUserd_GM107
OBJCHANNEL *pChannel,
NvHandle hClientId,
NvHandle hDeviceId,
NvHandle hChannelId
NvHandle hChannelId,
NvBool bUseRmApiForBar1
)
{
NvU32 userdSize;
RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
//
// The memTransfer API only works for client-allocated USERD
// because otherwise we are calling MapToCpu using the channel
// handle instead.
//
if (pChannel->bClientUserd && !bUseRmApiForBar1)
{
pChannel->pUserdMemdesc =
memmgrMemUtilsGetMemDescFromHandle(pMemoryManager, hClientId, pChannel->hUserD);
NV_ASSERT_OR_RETURN(pChannel->pUserdMemdesc != NULL, NV_ERR_GENERIC);
kfifoGetUserdSizeAlign_HAL(GPU_GET_KERNEL_FIFO(pGpu), &userdSize, NULL);
NV_CHECK_OK_OR_RETURN(
LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi,
hClientId,
hDeviceId,
pChannel->bClientUserd ? pChannel->hUserD : hChannelId,
0,
userdSize,
(void **)&pChannel->pControlGPFifo,
0));
if (kbusIsBarAccessBlocked(GPU_GET_KERNEL_BUS(pGpu)))
{
//
// GPFIFO aceess will not be set up in order to facilitate memTransfer APIs
// which will use GSP-DMA/CE with shadow buffers
//
pChannel->pControlGPFifo = NULL;
}
else
{
pChannel->pControlGPFifo =
(void *)memmgrMemDescBeginTransfer(pMemoryManager, pChannel->pUserdMemdesc,
TRANSFER_FLAGS_USE_BAR1);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
}
}
else
{
NvU32 userdSize = 0;
RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
kfifoGetUserdSizeAlign_HAL(GPU_GET_KERNEL_FIFO(pGpu), &userdSize, NULL);
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
pRmApi->MapToCpu(pRmApi, hClientId, hDeviceId,
pChannel->bClientUserd ? pChannel->hUserD : hChannelId, 0,
userdSize, (void **)&pChannel->pControlGPFifo, 0));
}
return NV_OK;
}
@ -1522,6 +1569,10 @@ memmgrMemUtilsMemSet_GM107
return NV_ERR_GENERIC;
}
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
if (pChannel->isProgressChecked)
{
// if progress is checked insert the semaphore with freeToken as payload
@ -1579,6 +1630,9 @@ memmgrMemUtilsMemSetBlocking_GM107
}
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
blocksPushed = _ceChannelScheduleWork_GM107(pGpu, pMemoryManager, pChannel,
0, 0, 0, // src parameters
base, ADDR_FBMEM, 0, // dst parameters
@ -1644,6 +1698,9 @@ memmgrMemUtilsMemSetBatched_GM107
{
NvU32 blocksPushed = 0;
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
blocksPushed = _ceChannelScheduleBatchWork_GM107(pGpu, pMemoryManager, pChannel,
0, 0, 0, // src parameters
base, ADDR_FBMEM, 0, // dst parameters
@ -1720,6 +1777,9 @@ memmgrMemUtilsMemCopyBatched_GM107
NvU64 size
)
{
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, NV_ERR_GENERIC);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, NV_ERR_GENERIC);
NvU32 blocksPushed = _ceChannelScheduleBatchWork_GM107(pGpu, pMemoryManager, pChannel,
src, srcAddressSpace, srcCpuCacheAttrib, // src parameters
dst, dstAddressSpace, dstCpuCacheAttrib, // dst parameters
@ -1897,6 +1957,8 @@ _getSpaceInPb(OBJCHANNEL *pChannel)
NvU32 filledSpace;
NvU32 avlblSpace;
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
if (pChannel->channelPutOffset >= MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset))
{
filledSpace = (pChannel->channelPutOffset - MEM_RD32((NvU8*)pChannel->pbCpuVA + pChannel->semaOffset));
@ -1952,6 +2014,8 @@ _ceChannelScheduleBatchWork_GM107
spaceInPb = pChannel->channelPbSize - pChannel->channelPutOffset;
NV_ASSERT_OR_RETURN(spaceInPb >= pChannel->methodSizePerBlock, 0);
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, 0);
// Support for sending semaphore release only work.
if (size > 0)
@ -2046,6 +2110,9 @@ _ceChannelScheduleWork_GM107
NvBool addFinishPayload;
NvU32 blockSize = 0;
NV_ASSERT_OR_RETURN(pChannel->pbCpuVA != NULL, 0);
NV_ASSERT_OR_RETURN(pChannel->pControlGPFifo != NULL, 0);
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
spaceInPb = _getSpaceInPb(pChannel);
@ -2224,6 +2291,9 @@ _ceChannelUpdateGpFifo_GM107
KernelChannel *pFifoKernelChannel;
KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
NV_ASSERT_OR_RETURN_VOID(pChannel->pbCpuVA != NULL);
NV_ASSERT_OR_RETURN_VOID(pChannel->pControlGPFifo != NULL);
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
GPPut = MEM_RD32(&pChannel->pControlGPFifo->GPPut);
GPGet = MEM_RD32(&pChannel->pControlGPFifo->GPGet);

Some files were not shown because too many files have changed in this diff Show More