535.113.01
This commit is contained in:
parent
a8e01be6b2
commit
f59818b751
@ -2,6 +2,12 @@
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.113.01] 2023-09-21
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
|
||||
|
||||
### [535.104.05] 2023-08-22
|
||||
|
||||
### [535.98] 2023-08-08
|
||||
|
12
README.md
12
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 535.104.05.
|
||||
version 535.113.01.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
535.104.05 driver release. This can be achieved by installing
|
||||
535.113.01 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -180,7 +180,7 @@ software applications.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 535.104.05 release,
|
||||
(see the table below). However, in the 535.113.01 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.104.05/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.113.01/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -856,6 +856,10 @@ Subsystem Device ID.
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.104.05\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.113.01\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
void* NV_API_CALL os_get_pid_info(void);
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info);
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
|
@ -5636,23 +5636,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_setup)
|
||||
#
|
||||
# Determine if migrate_vma_setup() function is present
|
||||
#
|
||||
# migrate_vma_setup() function was added by commit
|
||||
# a7d1f22bb74f32cf3cd93f52776007e161f1a738 ("mm: turn migrate_vma
|
||||
# upside down) in v5.4.
|
||||
# (2019-08-20).
|
||||
CODE="
|
||||
#include <linux/migrate.h>
|
||||
int conftest_migrate_vma_setup(void) {
|
||||
migrate_vma_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_added_flags)
|
||||
#
|
||||
# Determine if migrate_vma structure has flags
|
||||
@ -5743,23 +5726,25 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOASID_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
mm_pasid_set)
|
||||
mm_pasid_drop)
|
||||
#
|
||||
# Determine if mm_pasid_set() function is present
|
||||
# Determine if mm_pasid_drop() function is present
|
||||
#
|
||||
# Added by commit 701fac40384f ("iommu/sva: Assign a PASID to mm
|
||||
# on PASID allocation and free it on mm exit") in v5.18.
|
||||
# Moved to linux/iommu.h in commit cd3891158a77 ("iommu/sva: Move
|
||||
# PASID helpers to sva code") in v6.4.
|
||||
#
|
||||
# mm_pasid_set() function was added by commit
|
||||
# 701fac40384f07197b106136012804c3cae0b3de (iommu/sva: Assign a
|
||||
# PASID to mm on PASID allocation and free it on mm exit) in v5.18.
|
||||
# (2022-02-15).
|
||||
CODE="
|
||||
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
|
||||
#include <linux/sched/mm.h>
|
||||
#endif
|
||||
void conftest_mm_pasid_set(void) {
|
||||
mm_pasid_set();
|
||||
#include <linux/iommu.h>
|
||||
void conftest_mm_pasid_drop(void) {
|
||||
mm_pasid_drop();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_SET_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_crtc_state_has_no_vblank)
|
||||
@ -6341,6 +6326,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
|
||||
;;
|
||||
|
||||
mpol_preferred_many_present)
|
||||
#
|
||||
# Determine if MPOL_PREFERRED_MANY enum is present or not
|
||||
#
|
||||
# Added by commit b27abaccf8e8b ("mm/mempolicy: add
|
||||
# MPOL_PREFERRED_MANY for multiple preferred nodes") in
|
||||
# v5.15
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mempolicy.h>
|
||||
int mpol_preferred_many = MPOL_PREFERRED_MANY;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MPOL_PREFERRED_MANY_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
mmu_interval_notifier)
|
||||
#
|
||||
# Determine if mmu_interval_notifier struct is present or not
|
||||
|
@ -81,8 +81,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
@ -110,6 +109,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018 NVIDIA Corporation
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -149,7 +149,11 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
mode = vma_policy->mode;
|
||||
|
||||
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
|
||||
if ((mode == MPOL_BIND)
|
||||
#if defined(NV_MPOL_PREFERRED_MANY_PRESENT)
|
||||
|| (mode == MPOL_PREFERRED_MANY)
|
||||
#endif
|
||||
|| (mode == MPOL_PREFERRED)) {
|
||||
int home_node = NUMA_NO_NODE;
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
|
||||
@ -467,6 +471,10 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
|
||||
else
|
||||
uvm_page_mask_zero(write_fault_mask);
|
||||
|
||||
// There are no pending faults beyond write faults to RO region.
|
||||
if (uvm_page_mask_empty(read_fault_mask))
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
@ -32,19 +32,23 @@
|
||||
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
|
||||
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
|
||||
// conftest-able. We instead look for the presence of ioasid_get() or
|
||||
// mm_pasid_set(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
|
||||
// mm_pasid_drop(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_drop() was added in the
|
||||
// same patch as the removal of ioasid_get(). We assume the presence of
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_drop(v5.18+) is
|
||||
// present.
|
||||
//
|
||||
// arm_smmu_sva_bind() was added with commit
|
||||
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
|
||||
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
|
||||
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
|
||||
// and added mm_pasid_set().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
// and added mm_pasid_drop().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
|
||||
#if defined(CONFIG_IOMMU_SVA)
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
@ -191,7 +191,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
|
||||
for (i = 0; i < REDUCTIONS; ++i) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
|
||||
}
|
||||
|
||||
// Without a sys membar the channel tracking semaphore can and does complete
|
||||
@ -577,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
|
||||
|
||||
for (i = 0; i < REDUCTIONS; i++) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
|
||||
}
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -697,9 +697,6 @@ static inline int cmp_access_type(uvm_fault_access_type_t a, uvm_fault_access_ty
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Fetch a batch of faults from the buffer.
|
||||
FAULT_FETCH_MODE_BATCH_ALL,
|
||||
|
||||
// Fetch a batch of faults from the buffer. Stop at the first entry that is
|
||||
// not ready yet
|
||||
FAULT_FETCH_MODE_BATCH_READY,
|
||||
@ -857,9 +854,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
|
||||
// written out of order
|
||||
UVM_SPIN_WHILE(!gpu->parent->fault_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != FAULT_FETCH_MODE_ALL &&
|
||||
fetch_mode != FAULT_FETCH_MODE_BATCH_ALL &&
|
||||
fault_index > 0)
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -888,6 +883,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
|
||||
|
||||
current_entry->va_space = NULL;
|
||||
current_entry->filtered = false;
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
|
||||
if (current_entry->fault_source.utlb_id > batch_context->max_utlb_id) {
|
||||
UVM_ASSERT(current_entry->fault_source.utlb_id < replayable_faults->utlb_count);
|
||||
@ -1378,7 +1374,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(current_entry->fault_access_type ==
|
||||
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
|
||||
|
||||
current_entry->is_fatal = false;
|
||||
// Unserviceable faults were already skipped by the caller. There are no
|
||||
// unserviceable fault types that could be in the same VA block as a
|
||||
// serviceable fault.
|
||||
UVM_ASSERT(!current_entry->is_fatal);
|
||||
current_entry->is_throttled = false;
|
||||
current_entry->is_invalid_prefetch = false;
|
||||
|
||||
@ -1735,6 +1734,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_t access_type = current_entry->fault_access_type;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
// ATS faults can't be unserviceable, since unserviceable faults require
|
||||
// GMMU PTEs.
|
||||
UVM_ASSERT(!current_entry->is_fatal);
|
||||
|
||||
i++;
|
||||
|
||||
update_batch_and_notify_fault(gpu_va_space->gpu,
|
||||
@ -2044,8 +2047,10 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
va_space = NULL;
|
||||
status = NV_OK;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
@ -2273,27 +2278,9 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Only cancel faults flagged as fatal
|
||||
FAULT_CANCEL_MODE_FATAL,
|
||||
|
||||
// Cancel all faults in the batch unconditionally
|
||||
FAULT_CANCEL_MODE_ALL,
|
||||
} fault_cancel_mode_t;
|
||||
|
||||
// Cancel faults in the given fault service batch context. The function provides
|
||||
// two different modes depending on the value of cancel_mode:
|
||||
// - If cancel_mode == FAULT_CANCEL_MODE_FATAL, only faults flagged as fatal
|
||||
// will be cancelled. In this case, the reason reported to tools is the one
|
||||
// contained in the fault entry itself.
|
||||
// - If cancel_mode == FAULT_CANCEL_MODE_ALL, all faults will be cancelled
|
||||
// unconditionally. In this case, the reason reported to tools for non-fatal
|
||||
// faults is the one passed to this function.
|
||||
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
fault_cancel_mode_t cancel_mode,
|
||||
UvmEventFatalReason reason)
|
||||
// Cancel just the faults flagged as fatal in the given fault service batch
|
||||
// context.
|
||||
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS fault_status;
|
||||
@ -2301,8 +2288,6 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
|
||||
if (cancel_mode == FAULT_CANCEL_MODE_ALL)
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
@ -2320,12 +2305,66 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// We don't need to check whether a buffer flush is required
|
||||
// (due to VA range destruction).
|
||||
// - For cancel_mode == FAULT_CANCEL_MODE_FATAL, once a fault is
|
||||
// flagged as fatal we need to cancel it, even if its VA range no
|
||||
// longer exists.
|
||||
// - For cancel_mode == FAULT_CANCEL_MODE_ALL we don't care about
|
||||
// any of this, we just want to trigger RC in RM.
|
||||
// (due to VA range destruction). Once a fault is flagged as fatal
|
||||
// we need to cancel it, even if its VA range no longer exists.
|
||||
}
|
||||
|
||||
// See the comment for the same check in cancel_faults_all
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id))
|
||||
continue;
|
||||
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// See the comment on flushing in cancel_faults_all
|
||||
fault_status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
|
||||
// We report the first encountered error.
|
||||
if (status == NV_OK)
|
||||
status = fault_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Cancel all faults in the given fault service batch context, even those not
|
||||
// marked as fatal.
|
||||
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
UvmEventFatalReason reason)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS fault_status;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode;
|
||||
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
|
||||
if (current_entry->va_space != va_space) {
|
||||
// Fault on a different va_space, drop the lock of the old one...
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
va_space = current_entry->va_space;
|
||||
|
||||
// ... and take the lock of the new one
|
||||
uvm_va_space_down_read(va_space);
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
|
||||
@ -2337,32 +2376,28 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cancel the fault
|
||||
if (cancel_mode == FAULT_CANCEL_MODE_ALL || current_entry->is_fatal) {
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
|
||||
// If cancelling unconditionally and the fault was not fatal,
|
||||
// set the cancel reason passed to this function
|
||||
if (!current_entry->is_fatal) {
|
||||
current_entry->fatal_reason = reason;
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
// If the fault was already marked fatal, use its reason and cancel
|
||||
// mode. Otherwise use the provided reason.
|
||||
if (current_entry->is_fatal) {
|
||||
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
|
||||
cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
}
|
||||
else {
|
||||
current_entry->fatal_reason = reason;
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
|
||||
status = cancel_fault_precise_va(gpu, current_entry, cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// After cancelling the fatal faults, the fault buffer is flushed to remove
|
||||
// any potential duplicated fault that may have been added while processing
|
||||
// the faults in this batch. This flush also avoids doing unnecessary
|
||||
// processing after the fatal faults have been cancelled, so all the rest
|
||||
// are unlikely to remain after a replay because the context is probably in
|
||||
// the process of dying.
|
||||
// Because each cancel itself triggers a replay, there may be a large number
|
||||
// of new duplicated faults in the buffer after cancelling all the known
|
||||
// ones. Flushing the buffer discards them to avoid unnecessary processing.
|
||||
fault_status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
@ -2410,12 +2445,12 @@ static void cancel_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
UvmEventFatalReason reason)
|
||||
{
|
||||
if (gpu->parent->fault_cancel_va_supported) {
|
||||
cancel_faults_precise_va(gpu, batch_context, FAULT_CANCEL_MODE_ALL, reason);
|
||||
return;
|
||||
}
|
||||
|
||||
cancel_fault_batch_tlb(gpu, batch_context, reason);
|
||||
// Return code is ignored since we're on a global error path and wouldn't be
|
||||
// able to recover anyway.
|
||||
if (gpu->parent->fault_cancel_va_supported)
|
||||
cancel_faults_all(gpu, batch_context, reason);
|
||||
else
|
||||
cancel_fault_batch_tlb(gpu, batch_context, reason);
|
||||
}
|
||||
|
||||
|
||||
@ -2582,12 +2617,8 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
UVM_ASSERT(batch_context->has_fatal_faults);
|
||||
if (gpu->parent->fault_cancel_va_supported) {
|
||||
return cancel_faults_precise_va(gpu,
|
||||
batch_context,
|
||||
FAULT_CANCEL_MODE_FATAL,
|
||||
UvmEventFatalReasonInvalid);
|
||||
}
|
||||
if (gpu->parent->fault_cancel_va_supported)
|
||||
return cancel_faults_precise_va(gpu, batch_context);
|
||||
|
||||
return cancel_faults_precise_tlb(gpu, batch_context);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -368,7 +368,10 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_hopper(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_hopper(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
@ -153,10 +153,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define VM_MIXEDMAP 0x00000000
|
||||
#endif
|
||||
|
||||
#if !defined(MPOL_PREFERRED_MANY)
|
||||
#define MPOL_PREFERRED_MANY 5
|
||||
#endif
|
||||
|
||||
//
|
||||
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
|
||||
// routines pick up our version
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -106,7 +106,10 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_maxwell(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_maxwell(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
UVM_ASSERT(depth == 0);
|
||||
|
@ -51,7 +51,7 @@ typedef struct
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#else
|
||||
#if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#endif
|
||||
#endif
|
||||
|
@ -323,37 +323,153 @@ static void uvm_mmu_page_table_cpu_memset_16(uvm_gpu_t *gpu,
|
||||
uvm_mmu_page_table_cpu_unmap(gpu, phys_alloc);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
for (i = 0; i < pde_count; i++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i]);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, &directory->phys_alloc, start_index + i, pde_data, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
|
||||
NvU32 max_inline_entries;
|
||||
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 entry_count, i, j;
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
// All but the first memory operation can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
// No membar is needed until the last memory operation. Otherwise,
|
||||
// use caller's membar flag.
|
||||
if ((i + entry_count) < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
|
||||
uvm_push_set_flag(push, push_membar_flag);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory->depth, directory->entries[start_index + i + j]);
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
|
||||
}
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * entry_size);
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += entry_size * entry_count;
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
// pde_fill() is optimized for pde_count == 1, which is the common case. The
|
||||
// map_remap() function is the only case where pde_count > 1, only used on GA100
|
||||
// GPUs for 512MB page size mappings.
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, directory->depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU64 clear_bits[2];
|
||||
uvm_mmu_mode_hal_t *hal = tree->hal;
|
||||
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
|
||||
if (dir->depth == tree->hal->page_table_depth(page_size)) {
|
||||
*clear_bits = 0; // Invalid PTE
|
||||
}
|
||||
else {
|
||||
// passing in NULL for the phys_allocs will mark the child entries as invalid
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
hal->make_pde(clear_bits, phys_allocs, dir->depth);
|
||||
// Passing in NULL for the phys_allocs will mark the child entries as
|
||||
// invalid.
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
|
||||
// Make sure that using only clear_bits[0] will work
|
||||
UVM_ASSERT(hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
// Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
|
||||
// than 512 entries. We initialize them all with the same clean PDE.
|
||||
// Additionally, only ATS systems may require clean PDEs bit settings based
|
||||
// on the mapping VA.
|
||||
if (dir->depth == tree->hal->page_table_depth(page_size) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
|
||||
NvU64 clear_bits[2];
|
||||
|
||||
// initialize the memory to a reasonable value
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
// If it is not a PTE, make a clean PDE.
|
||||
if (dir->depth != tree->hal->page_table_depth(page_size)) {
|
||||
tree->hal->make_pde(clear_bits, phys_allocs, dir->depth, dir->entries[0]);
|
||||
|
||||
// Make sure that using only clear_bits[0] will work.
|
||||
UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
else {
|
||||
*clear_bits = 0;
|
||||
}
|
||||
|
||||
// Initialize the memory to a reasonable value.
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
@ -367,8 +483,10 @@ static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
NvLength phys_alloc_size = hal->allocation_size(depth, page_size);
|
||||
uvm_page_directory_t *dir;
|
||||
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not page_size.
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries
|
||||
// that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not
|
||||
// page_size.
|
||||
if (depth == hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC))
|
||||
entry_count = 0;
|
||||
else
|
||||
@ -409,108 +527,6 @@ static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, N
|
||||
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, directory, start_index, pde_data[0], pde_count);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, directory, start_index, pde_data, pde_count);
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->addr);
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
if (entry_size == sizeof(pde_data[0])) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push, pde_entry_addr, pde_data[0], sizeof(pde_data[0]) * pde_count);
|
||||
}
|
||||
else {
|
||||
NvU32 max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / sizeof(pde_data);
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 membar_flag = 0;
|
||||
NvU32 i;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
NvU32 j;
|
||||
NvU32 entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++)
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, sizeof(pde_data));
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
// All but the first memcopy can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
// No membar is needed until the last copy. Otherwise, use
|
||||
// caller's membar flag.
|
||||
if (i + entry_count < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (membar_flag)
|
||||
uvm_push_set_flag(push, membar_flag);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * sizeof(pde_data));
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += sizeof(pde_data) * entry_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, depth, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, depth, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *host_pde_write(uvm_page_directory_t *dir,
|
||||
uvm_page_directory_t *parent,
|
||||
NvU32 index_in_parent)
|
||||
@ -540,7 +556,7 @@ static void pde_write(uvm_page_tree_t *tree,
|
||||
phys_allocs[i] = &entry->phys_alloc;
|
||||
}
|
||||
|
||||
pde_fill(tree, dir->depth, &dir->phys_alloc, entry_index, 1, phys_allocs, push);
|
||||
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
|
||||
}
|
||||
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
|
||||
@ -813,8 +829,11 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
|
||||
|
||||
static void map_remap_deinit(uvm_page_tree_t *tree)
|
||||
{
|
||||
if (tree->map_remap.pde0.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0);
|
||||
if (tree->map_remap.pde0) {
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0->phys_alloc);
|
||||
uvm_kvfree(tree->map_remap.pde0);
|
||||
tree->map_remap.pde0 = NULL;
|
||||
}
|
||||
|
||||
if (tree->map_remap.ptes_invalid_4k.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.ptes_invalid_4k);
|
||||
@ -839,10 +858,16 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
// PDE1-depth(512M) PTE. We first map it to the pde0 directory, then we
|
||||
// return the PTE for the get_ptes()'s caller.
|
||||
if (tree->hal->page_sizes() & UVM_PAGE_SIZE_512M) {
|
||||
status = allocate_page_table(tree, UVM_PAGE_SIZE_2M, &tree->map_remap.pde0);
|
||||
if (status != NV_OK)
|
||||
tree->map_remap.pde0 = allocate_directory(tree,
|
||||
UVM_PAGE_SIZE_2M,
|
||||
tree->hal->page_table_depth(UVM_PAGE_SIZE_2M),
|
||||
UVM_PMM_ALLOC_FLAGS_EVICT);
|
||||
if (tree->map_remap.pde0 == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "map remap init");
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
@ -864,22 +889,23 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
NvU32 depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_4K) - 1;
|
||||
size_t index_4k = tree->hal->entry_offset(depth, UVM_PAGE_SIZE_4K);
|
||||
|
||||
// pde0 depth equals UVM_PAGE_SIZE_2M.
|
||||
NvU32 pde0_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_2M);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0.size / tree->hal->entry_size(pde0_depth);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0->phys_alloc.size / tree->hal->entry_size(tree->map_remap.pde0->depth);
|
||||
|
||||
// The big-page entry is NULL which makes it an invalid entry.
|
||||
phys_allocs[index_4k] = &tree->map_remap.ptes_invalid_4k;
|
||||
|
||||
// By default CE operations include a MEMBAR_SYS. MEMBAR_GPU is
|
||||
// sufficient when pde0 is allocated in VIDMEM.
|
||||
if (tree->map_remap.pde0.addr.aperture == UVM_APERTURE_VID)
|
||||
if (tree->map_remap.pde0->phys_alloc.addr.aperture == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
// This is an orphan directory, make_pde() requires a directory to
|
||||
// compute the VA. The UVM depth map_remap() operates on is not in the
|
||||
// range make_pde() must operate. We only need to supply the fields used
|
||||
// by make_pde() to not access invalid memory addresses.
|
||||
|
||||
pde_fill(tree,
|
||||
pde0_depth,
|
||||
&tree->map_remap.pde0,
|
||||
tree->map_remap.pde0,
|
||||
0,
|
||||
pde0_entries,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_allocs,
|
||||
@ -1332,10 +1358,9 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
if (uvm_page_table_range_aperture(range) == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
phys_alloc[0] = &tree->map_remap.pde0;
|
||||
phys_alloc[0] = &tree->map_remap.pde0->phys_alloc;
|
||||
pde_fill(tree,
|
||||
range->table->depth,
|
||||
&range->table->phys_alloc,
|
||||
range->table,
|
||||
range->start_index,
|
||||
range->entry_count,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_alloc,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -219,7 +219,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
// point to two items for dual PDEs).
|
||||
// any of allocs are allowed to be NULL, in which case they are to be
|
||||
// treated as empty.
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth);
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth, uvm_page_directory_t *child_dir);
|
||||
|
||||
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
|
||||
// (in the case of Pascal dual PDEs)
|
||||
@ -229,7 +229,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
NvU32 (*entries_per_index)(NvU32 depth);
|
||||
|
||||
// For dual PDEs, this is ether 1 or 0, depending on the page size.
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// re-written using make_pde.
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
@ -295,9 +295,8 @@ struct uvm_page_tree_struct
|
||||
|
||||
// PDE0 where all big-page entries are invalid, and small-page entries
|
||||
// point to ptes_invalid_4k.
|
||||
// pde0 is only used on Pascal-Ampere, i.e., they have the same PDE
|
||||
// format.
|
||||
uvm_mmu_page_table_alloc_t pde0;
|
||||
// pde0 is used on Pascal+ GPUs, i.e., they have the same PDE format.
|
||||
uvm_page_directory_t *pde0;
|
||||
} map_remap;
|
||||
|
||||
// Tracker for all GPU operations on the tree
|
||||
@ -365,21 +364,32 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
|
||||
// the same page size without an intervening put_ptes. To duplicate a subset of
|
||||
// an existing range or change the size of an existing range, use
|
||||
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
|
||||
// Same as uvm_page_tree_get_ptes(), but doesn't synchronize the GPU work.
|
||||
//
|
||||
// All pending operations can be waited on with uvm_page_tree_wait().
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
|
||||
// Returns a single-entry page table range for the addresses passed.
|
||||
// The size parameter must be a page size supported by this tree.
|
||||
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
|
||||
// page_size.
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *single);
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single);
|
||||
|
||||
// For a single-entry page table range, write the PDE (which could be a dual
|
||||
// PDE) to the GPU.
|
||||
@ -478,8 +488,8 @@ NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
// new_range_vec will contain the upper portion of range_vec, starting at
|
||||
// new_end + 1.
|
||||
//
|
||||
// new_end + 1 is required to be within the address range of range_vec and be aligned to
|
||||
// range_vec's page_size.
|
||||
// new_end + 1 is required to be within the address range of range_vec and be
|
||||
// aligned to range_vec's page_size.
|
||||
//
|
||||
// On failure, the original range vector is left unmodified.
|
||||
NV_STATUS uvm_page_table_range_vec_split_upper(uvm_page_table_range_vec_t *range_vec,
|
||||
@ -501,18 +511,22 @@ void uvm_page_table_range_vec_destroy(uvm_page_table_range_vec_t *range_vec);
|
||||
// for each offset.
|
||||
// The caller_data pointer is what the caller passed in as caller_data to
|
||||
// uvm_page_table_range_vec_write_ptes().
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
|
||||
void *caller_data);
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec,
|
||||
NvU64 offset,
|
||||
void *caller_data);
|
||||
|
||||
// Write all PTEs covered by the range vector using the given PTE making function.
|
||||
// Write all PTEs covered by the range vector using the given PTE making
|
||||
// function.
|
||||
//
|
||||
// After writing all the PTEs a TLB invalidate operation is performed including
|
||||
// the passed in tlb_membar.
|
||||
//
|
||||
// See comments about uvm_page_table_range_pte_maker_t for details about the
|
||||
// PTE making callback.
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec, uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker, void *caller_data);
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec,
|
||||
uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker,
|
||||
void *caller_data);
|
||||
|
||||
// Set all PTEs covered by the range vector to an empty PTE
|
||||
//
|
||||
@ -636,8 +650,9 @@ static NvU64 uvm_page_table_range_size(uvm_page_table_range_t *range)
|
||||
|
||||
// Get the physical address of the entry at entry_index within the range
|
||||
// (counted from range->start_index).
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree, uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree,
|
||||
uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
{
|
||||
NvU32 entry_size = uvm_mmu_pte_size(tree, range->page_size);
|
||||
uvm_gpu_phys_address_t entry = range->table->phys_alloc.addr;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -146,9 +146,15 @@ static void fake_tlb_invals_disable(void)
|
||||
g_fake_tlb_invals_tracking_enabled = false;
|
||||
}
|
||||
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be
|
||||
// verified later.
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
return;
|
||||
@ -210,8 +216,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
|
||||
}
|
||||
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
|
||||
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
result = false;
|
||||
}
|
||||
|
||||
@ -230,7 +236,8 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
|
||||
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
g_last_fake_inval->base,
|
||||
g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
return false;
|
||||
}
|
||||
if (g_last_fake_inval->depth != expected_depth) {
|
||||
@ -247,15 +254,16 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
if (g_fake_invals_count == 0) {
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
|
||||
base, base + size);
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
|
||||
base, base + size,
|
||||
inval->base, inval->base + inval->size);
|
||||
base,
|
||||
base + size,
|
||||
inval->base,
|
||||
inval->base + inval->size);
|
||||
return false;
|
||||
}
|
||||
if (inval->depth != expected_depth) {
|
||||
@ -270,7 +278,13 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
bool expected_membar)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
@ -488,7 +502,6 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_page_tree_t tree;
|
||||
@ -842,6 +855,7 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_RET(range2.entry_count == 256);
|
||||
TEST_CHECK_RET(range2.table->ref_count == 512);
|
||||
TEST_CHECK_RET(range1.table == range2.table);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
|
||||
TEST_CHECK_RET(range1.start_index == 256);
|
||||
@ -871,6 +885,7 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
|
||||
TEST_CHECK_RET(range64k.entry_count == 16);
|
||||
TEST_CHECK_RET(range64k.table->ref_count == 16);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
|
||||
TEST_CHECK_RET(range64k.start_index == 16);
|
||||
@ -1030,10 +1045,13 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
|
||||
// Depth 4
|
||||
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
|
||||
|
||||
// Depth 3
|
||||
NvU64 extent_pde0 = extent_pte * (1ull << 8);
|
||||
|
||||
// Depth 2
|
||||
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
|
||||
|
||||
// Depth 1
|
||||
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
|
||||
|
||||
@ -1081,7 +1099,11 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 min_page_size,
|
||||
NvU32 max_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_push_t push;
|
||||
@ -1205,7 +1227,11 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
|
||||
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
|
||||
if (*pte != expected_pte) {
|
||||
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
|
||||
*pte, expected_pte, offset, range_vec->start, range_vec->size);
|
||||
*pte,
|
||||
expected_pte,
|
||||
offset,
|
||||
range_vec->start,
|
||||
range_vec->size);
|
||||
return false;
|
||||
}
|
||||
offset += range_vec->page_size;
|
||||
@ -1226,7 +1252,11 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
TEST_CHECK_RET(data.status == NV_OK);
|
||||
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
|
||||
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
|
||||
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
|
||||
range_vec->start,
|
||||
range_vec->size,
|
||||
range_vec->page_size,
|
||||
page_table_depth,
|
||||
membar != UVM_MEMBAR_NONE));
|
||||
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
|
||||
|
||||
fake_tlb_invals_disable();
|
||||
@ -1249,7 +1279,11 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
@ -1552,17 +1586,17 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
|
||||
memset(phys_allocs, 0, sizeof(phys_allocs));
|
||||
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits == 0x0L);
|
||||
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
|
||||
|
||||
for (j = 0; j <= 2; j++) {
|
||||
@ -1632,6 +1666,7 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
|
||||
@ -1639,31 +1674,31 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
|
||||
@ -1727,36 +1762,36 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, 3, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// NO_ATS PDE1 (depth 2)
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 2);
|
||||
hal->make_pde(pde_bits, phys_allocs, 2, NULL);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
|
||||
else
|
||||
@ -1805,32 +1840,32 @@ static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
// Cleared PDEs work as expected for big and small PDEs.
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs, uncached ATS allowed.
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, 0, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
|
||||
|
||||
// Dual PDEs, uncached.
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
hal->make_pde(pde_bits, phys_allocs, 4, NULL);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
|
||||
@ -2303,7 +2338,8 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
gpu->parent = parent_gpu;
|
||||
|
||||
// At least test_tlb_invalidates() relies on global state
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
|
||||
// time.
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// Allocate the fake TLB tracking state. Notably tests still need to enable
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2020 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -140,7 +140,10 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_pascal(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_pascal(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
@ -10155,6 +10155,30 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
|
||||
return preferred_location;
|
||||
|
||||
// Check if we should map the closest resident processor remotely on remote CPU fault
|
||||
//
|
||||
// When faulting on CPU, there's a linux process on behalf of it, which is associated
|
||||
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
|
||||
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
|
||||
// (local) fault, and we may want to migrate a page from GPU to CPU.
|
||||
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
|
||||
// VM, we might preserve residence.
|
||||
//
|
||||
// Establishing a remote fault without access counters means the memory could stay in
|
||||
// the wrong spot for a long time, which is why we prefer to avoid creating remote
|
||||
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
|
||||
// in place for NIC accesses.
|
||||
//
|
||||
// The logic that's used to detect remote faulting also keeps memory in place for
|
||||
// ptrace accesses. We would prefer to control those policies separately, but the
|
||||
// NIC case takes priority.
|
||||
if (UVM_ID_IS_CPU(processor_id) &&
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
|
||||
va_block_context->mm != current->mm) {
|
||||
UVM_ASSERT(va_block_context->mm != NULL);
|
||||
return closest_resident_processor;
|
||||
}
|
||||
|
||||
// If the page is resident on a processor other than the preferred location,
|
||||
// or the faulting processor can't access the preferred location, we select
|
||||
// the faulting processor as the new residency.
|
||||
|
@ -193,7 +193,8 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, uvm
|
||||
for ((node) = uvm_va_policy_node_iter_first((va_block), (start), (end)), \
|
||||
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)); \
|
||||
(node); \
|
||||
(node) = (next))
|
||||
(node) = (next), \
|
||||
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)))
|
||||
|
||||
// Returns the first policy in the range [start, end], if any.
|
||||
// Locking: The va_block lock must be held.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -145,7 +145,10 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_volta(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_volta(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
NvU32 depth,
|
||||
uvm_page_directory_t *child_dir)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_volta(depth);
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
|
||||
*/
|
||||
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -28,6 +28,11 @@
|
||||
|
||||
#include "nv-time.h"
|
||||
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/numa.h>
|
||||
|
||||
#include <linux/pid.h>
|
||||
|
||||
extern char *NVreg_TemporaryFilePath;
|
||||
|
||||
#define MAX_ERROR_STRING 512
|
||||
@ -2122,6 +2127,43 @@ void NV_API_CALL os_nv_cap_close_fd
|
||||
nv_cap_close_fd(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads the total memory and free memory of a NUMA node from the kernel.
|
||||
*/
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage
|
||||
(
|
||||
NvS32 node_id,
|
||||
NvU64 *free_memory_bytes,
|
||||
NvU64 *total_memory_bytes
|
||||
)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
NvU32 zone_id;
|
||||
|
||||
if (node_id >= MAX_NUMNODES)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n");
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
pgdat = NODE_DATA(node_id);
|
||||
|
||||
*free_memory_bytes = 0;
|
||||
*total_memory_bytes = 0;
|
||||
|
||||
for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++)
|
||||
{
|
||||
zone = &(pgdat->node_zones[zone_id]);
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
*free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE);
|
||||
*total_memory_bytes += (zone->present_pages * PAGE_SIZE);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef struct os_numa_gpu_mem_hotplug_notifier_s
|
||||
{
|
||||
NvU64 start_pa;
|
||||
@ -2373,3 +2415,28 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
|
||||
#endif
|
||||
}
|
||||
|
||||
void* NV_API_CALL os_get_pid_info(void)
|
||||
{
|
||||
return get_task_pid(current, PIDTYPE_PID);
|
||||
}
|
||||
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info)
|
||||
{
|
||||
if (pid_info != NULL)
|
||||
put_pid(pid_info);
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid)
|
||||
{
|
||||
if ((pid_info == NULL) || (ns_pid == NULL))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
*ns_pid = pid_vnr((struct pid *)pid_info);
|
||||
|
||||
// The call returns 0 if the PID is not found in the current ns
|
||||
if (*ns_pid == 0)
|
||||
return NV_ERR_OBJECT_NOT_FOUND;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
@ -4740,7 +4740,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
|
||||
{
|
||||
LinkTrainingType preferredTrainingType = trainType;
|
||||
bool result;
|
||||
bool bEnableFecOnSor;
|
||||
|
||||
//
|
||||
// Validate link config against caps
|
||||
//
|
||||
@ -4832,16 +4832,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
|
||||
result = postLTAdjustment(activeLinkConfig, force);
|
||||
}
|
||||
|
||||
bEnableFecOnSor = lConfig.bEnableFEC;
|
||||
|
||||
if (main->isEDP())
|
||||
{
|
||||
DeviceImpl * nativeDev = findDeviceInList(Address());
|
||||
if (nativeDev && nativeDev->bIsPreviouslyFakedMuxDevice)
|
||||
bEnableFecOnSor = activeLinkConfig.bEnableFEC;
|
||||
}
|
||||
|
||||
if((lConfig.lanes != 0) && result && bEnableFecOnSor)
|
||||
if((lConfig.lanes != 0) && result && activeLinkConfig.bEnableFEC)
|
||||
{
|
||||
//
|
||||
// Extended latency from link-train end to FEC enable pattern
|
||||
@ -6057,7 +6048,7 @@ void ConnectorImpl::notifyLongPulseInternal(bool statusConnected)
|
||||
if (this->bReassessMaxLink)
|
||||
{
|
||||
//
|
||||
// If the highest assessed LC is not equal to
|
||||
// If the highest assessed LC is not equal to
|
||||
// max possible link config, re-assess link
|
||||
//
|
||||
NvU8 retries = 0U;
|
||||
|
@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r537_13
|
||||
#define NV_BUILD_BRANCH r537_41
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r537_13
|
||||
#define NV_PUBLIC_BRANCH r537_41
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_13-260"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33206197)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r537_41-286"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33292694)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_13-260"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33206197)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/r537_41-286"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r537_13-1"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33194057)
|
||||
#define NV_BUILD_BRANCH_VERSION "r537_41-1"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33292694)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "537.17"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33194057)
|
||||
#define NV_BUILD_NAME "537.42"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33292694)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R535
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "535.104.05"
|
||||
#define NV_VERSION_STRING "535.113.01"
|
||||
|
||||
#else
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_fb_h_
|
||||
#define __gh100_dev_fb_h_
|
||||
#define NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT 8 /* */
|
||||
@ -29,4 +29,25 @@
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI 0x00100A38 /* RW-4R */
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR 31:0 /* RWIVF */
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK 0x000FFFFF /* ----V */
|
||||
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
#endif // __gh100_dev_fb_h_
|
||||
|
29
src/common/inc/swref/published/hopper/gh100/dev_fbpa.h
Normal file
29
src/common/inc/swref/published/hopper/gh100/dev_fbpa.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_dev_fbpa_h_
|
||||
#define __gh100_dev_fbpa_h_
|
||||
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 4 /* */
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x009025A0+(i)*4) /* RW-4A */
|
||||
#endif // __gh100_dev_fbpa_h_
|
33
src/common/inc/swref/published/hopper/gh100/dev_ltc.h
Normal file
33
src/common/inc/swref/published/hopper/gh100/dev_ltc.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_dev_ltc_h_
|
||||
#define __gh100_dev_ltc_h_
|
||||
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT 0x001404f8 /* RW-4R */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIVF */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIVF */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
|
||||
#endif // __gh100_dev_ltc_h_
|
52
src/common/inc/swref/published/hopper/gh100/dev_nv_xpl.h
Normal file
52
src/common/inc/swref/published/hopper/gh100/dev_nv_xpl.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_dev_nv_xpl_h_
|
||||
#define __gh100_dev_nv_xpl_h_
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF 0x00000a54 /* R--4R */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF__PRIV_LEVEL_MASK 0x00000b08 /* */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR 15:0 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR 31:16 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT 0x00000a58 /* R--4R */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT__PRIV_LEVEL_MASK 0x00000b08 /* */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR 15:0 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR 31:16 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
|
||||
#define NV_XPL_DL_ERR_RESET 0x00000a5c /* RW-4R */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT 0:0 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT 1:1 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT 16:16 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT 17:17 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#endif // __gh100_dev_nv_xpl_h__
|
@ -24,4 +24,7 @@
|
||||
#ifndef __gh100_dev_xtl_ep_pri_h__
|
||||
#define __gh100_dev_xtl_ep_pri_h__
|
||||
#define NV_EP_PCFGM 0x92FFF:0x92000 /* RW--D */
|
||||
|
||||
#define NV_XTL_EP_PRI_DED_ERROR_STATUS 0x0000043C /* RW-4R */
|
||||
#define NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS 0x000003C8 /* RW-4R */
|
||||
#endif // __gh100_dev_xtl_ep_pri_h__
|
||||
|
@ -21,3 +21,9 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 52
|
||||
#define NV_LTC_PRI_STRIDE 8192
|
||||
#define NV_LTS_PRI_STRIDE 512
|
||||
#define NV_FBPA_PRI_STRIDE 16384
|
||||
#define NV_SCAL_LITTER_NUM_FBPAS 24
|
||||
#define NV_XPL_BASE_ADDRESS 540672
|
||||
#define NV_XTL_BASE_ADDRESS 593920
|
||||
|
@ -47,5 +47,17 @@
|
||||
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT 3:3
|
||||
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT_PENDING 0x1
|
||||
#define NV_XAL_EP_SCPM_PRI_DUMMY_DATA_PATTERN_INIT 0xbadf0200
|
||||
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT 0x0010f364 /* RW-4R */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT 0x0010f37c /* RW-4R */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
#endif // __gh100_pri_nv_xal_ep_h__
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -635,4 +635,7 @@
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT 28:28 /* RWIVF */
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_SUPPORTED 0x00000001 /* RWI-V */
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_NOT_SUPPORTED 0x00000000 /* RW--V */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM 0x000007c0 /* RW-4R */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA 31:0 /* RWEVF */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT 0xdeadbaad /* RWE-V */
|
||||
#endif // __ls10_dev_nvlipt_lnk_ip_h__
|
||||
|
@ -439,6 +439,11 @@ NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
|
||||
*/
|
||||
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/******************************* NVLink link management functions *******************************/
|
||||
|
@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
|
||||
*/
|
||||
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -198,3 +198,48 @@ nvlink_lib_is_registerd_device_with_reduced_config(void)
|
||||
|
||||
return bIsReducedConfg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of devices that have the device type deviceType
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_return_device_count_by_type
|
||||
(
|
||||
NvU32 deviceType,
|
||||
NvU32 *numDevices
|
||||
)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
nvlink_device *dev = NULL;
|
||||
NvU32 device_count = 0;
|
||||
|
||||
if (nvlink_lib_is_initialized())
|
||||
{
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Loop through device list
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
if (dev->type == deviceType)
|
||||
{
|
||||
device_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
}
|
||||
*numDevices = device_count;
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
@ -213,6 +213,7 @@
|
||||
_op(void, nvswitch_reset_persistent_link_hw_state, (nvswitch_device *device, NvU32 linkNumber), _arch)\
|
||||
_op(void, nvswitch_store_topology_information, (nvswitch_device *device, nvlink_link *link), _arch) \
|
||||
_op(void, nvswitch_init_lpwr_regs, (nvlink_link *link), _arch) \
|
||||
_op(void, nvswitch_program_l1_scratch_reg, (nvswitch_device *device, NvU32 linkNumber), _arch) \
|
||||
_op(NvlStatus, nvswitch_set_training_mode, (nvswitch_device *device), _arch) \
|
||||
_op(NvU32, nvswitch_get_sublink_width, (nvswitch_device *device, NvU32 linkNumber), _arch) \
|
||||
_op(NvBool, nvswitch_i2c_is_device_access_allowed, (nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead), _arch) \
|
||||
|
@ -583,9 +583,12 @@ typedef struct
|
||||
NvBool bDisabledRemoteEndLinkMaskCached;
|
||||
} lr10_device;
|
||||
|
||||
#define NVSWITCH_NUM_DEVICES_PER_DELTA_LR10 6
|
||||
|
||||
typedef struct {
|
||||
NvU32 switchPhysicalId;
|
||||
NvU64 linkMask;
|
||||
NvU64 accessLinkMask;
|
||||
NvU64 trunkLinkMask;
|
||||
} lr10_links_connected_to_disabled_remote_end;
|
||||
|
||||
#define NVSWITCH_GET_CHIP_DEVICE_LR10(_device) \
|
||||
@ -649,6 +652,7 @@ void nvswitch_setup_link_loopback_mode_lr10(nvswitch_device *device, NvU32
|
||||
void nvswitch_reset_persistent_link_hw_state_lr10(nvswitch_device *device, NvU32 linkNumber);
|
||||
void nvswitch_store_topology_information_lr10(nvswitch_device *device, nvlink_link *link);
|
||||
void nvswitch_init_lpwr_regs_lr10(nvlink_link *link);
|
||||
void nvswitch_program_l1_scratch_reg_lr10(nvswitch_device *device, NvU32 linkNumber);
|
||||
NvlStatus nvswitch_set_training_mode_lr10(nvswitch_device *device);
|
||||
NvBool nvswitch_i2c_is_device_access_allowed_lr10(nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead);
|
||||
NvU32 nvswitch_get_sublink_width_lr10(nvswitch_device *device,NvU32 linkNumber);
|
||||
|
@ -529,10 +529,20 @@ typedef struct
|
||||
{
|
||||
NvBool bLinkErrorsCallBackEnabled;
|
||||
NvBool bLinkStateCallBackEnabled;
|
||||
NvBool bResetAndDrainRetry;
|
||||
NvU64 lastRetrainTime;
|
||||
NvU64 lastLinkUpTime;
|
||||
} NVLINK_LINK_ERROR_REPORTING_STATE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
|
||||
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
|
||||
} NVLINK_LINK_ERROR_REPORTING_DATA;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NVLINK_LINK_ERROR_REPORTING_STATE state;
|
||||
NVLINK_LINK_ERROR_REPORTING_DATA data;
|
||||
} NVLINK_LINK_ERROR_REPORTING;
|
||||
|
||||
typedef struct
|
||||
@ -834,7 +844,6 @@ typedef const struct
|
||||
#define nvswitch_setup_link_loopback_mode_ls10 nvswitch_setup_link_loopback_mode_lr10
|
||||
|
||||
#define nvswitch_link_lane_reversed_ls10 nvswitch_link_lane_reversed_lr10
|
||||
#define nvswitch_request_tl_link_state_ls10 nvswitch_request_tl_link_state_lr10
|
||||
|
||||
#define nvswitch_i2c_get_port_info_ls10 nvswitch_i2c_get_port_info_lr10
|
||||
#define nvswitch_i2c_set_hw_speed_mode_ls10 nvswitch_i2c_set_hw_speed_mode_lr10
|
||||
@ -929,6 +938,7 @@ void nvswitch_corelib_clear_link_state_lr10(nvlink_link *link);
|
||||
NvlStatus nvswitch_corelib_set_dl_link_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NvlStatus nvswitch_corelib_set_tx_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
void nvswitch_init_lpwr_regs_ls10(nvlink_link *link);
|
||||
void nvswitch_program_l1_scratch_reg_ls10(nvswitch_device *device, NvU32 linkNumber);
|
||||
|
||||
NvlStatus nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device *device, NvU32 instance);
|
||||
|
||||
@ -986,6 +996,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
|
||||
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
|
||||
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
|
||||
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
|
||||
NvlStatus nvswitch_request_tl_link_state_ls10(nvlink_link *link, NvU32 tlLinkState, NvBool bSync);
|
||||
|
||||
//
|
||||
// SU generated functions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -46,6 +46,9 @@ typedef enum _MINION_STATUS
|
||||
MINION_ALARM_BUSY = 80,
|
||||
} MINION_STATUS;
|
||||
|
||||
#define LINKSTATUS_RESET 0x0
|
||||
#define LINKSTATUS_UNINIT 0x1
|
||||
#define LINKSTATUS_LANESHUTDOWN 0x13
|
||||
#define LINKSTATUS_EMERGENCY_SHUTDOWN 0x29
|
||||
#define LINKSTATUS_INITPHASE1 0x24
|
||||
#define LINKSTATUS_ACTIVE_PENDING 0x25
|
||||
#endif // _MINION_NVLINK_DEFINES_PUBLIC_H_
|
||||
|
@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
|
||||
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
|
||||
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
|
||||
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
|
||||
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
|
||||
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
|
||||
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
|
||||
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
|
||||
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
|
||||
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
|
||||
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
|
||||
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
|
||||
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x176bd707, 0x7693db62, 0xcee1dbf7, 0x0ec5a1fa,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x930f31b1, 0x6ce8df20, 0xa1e5e4d9, 0xc55f48a9,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -43,40 +43,68 @@
|
||||
#include "nvswitch/lr10/dev_nvlipt_ip.h"
|
||||
#include "nvswitch/lr10/dev_nport_ip.h"
|
||||
|
||||
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 8 // This must be incremented if any entries are added to the array below
|
||||
#define NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK 12 // This must be incremented if any entries are added to the array below
|
||||
lr10_links_connected_to_disabled_remote_end nvswitchDisconnetedRemoteLinkMasks[] =
|
||||
{
|
||||
{
|
||||
0x8, // switchPhysicalId
|
||||
0x56A000500 //linkMask
|
||||
0x8, // switchPhysicalId
|
||||
0x56A000500, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x9, // switchPhysicalId
|
||||
0x509009900 //linkMask
|
||||
0x9, // switchPhysicalId
|
||||
0x509009900, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0xb, // switchPhysicalId
|
||||
0x56A000600 //linkMask
|
||||
0xa, // switchPhysicalId
|
||||
0x0, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0xc, // switchPhysicalId
|
||||
0x4A9009400 //linkMask
|
||||
0xb, // switchPhysicalId
|
||||
0x56A000600, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x18, // switchPhysicalId
|
||||
0x56A000500 //linkMask
|
||||
0xc, // switchPhysicalId
|
||||
0x4A9009400, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x19, // switchPhysicalId
|
||||
0x509009900 //linkMask
|
||||
0xd, // switchPhysicalId
|
||||
0x0, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1b, // switchPhysicalId
|
||||
0x56A000600 //linkMask
|
||||
0x18, // switchPhysicalId
|
||||
0x56A000500, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1c, // switchPhysicalId
|
||||
0x4A9009400 //linkMask
|
||||
0x19, // switchPhysicalId
|
||||
0x509009900, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1a, // switchPhysicalId
|
||||
0x0, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1b, // switchPhysicalId
|
||||
0x56A000600, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1c, // switchPhysicalId
|
||||
0x4A9009400, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
{
|
||||
0x1d, // switchPhysicalId
|
||||
0x0, // accessLinkMask
|
||||
0xFF00FF // trunkLinkMask
|
||||
},
|
||||
};
|
||||
ct_assert(sizeof(nvswitchDisconnetedRemoteLinkMasks)/sizeof(lr10_links_connected_to_disabled_remote_end) == NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK);
|
||||
@ -653,6 +681,15 @@ nvswitch_init_lpwr_regs_lr10
|
||||
tempRegVal);
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_program_l1_scratch_reg_lr10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 linkNumber
|
||||
)
|
||||
{
|
||||
// Not Implemented for LR10
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_init_buffer_ready_lr10
|
||||
@ -841,7 +878,6 @@ nvswitch_corelib_set_dl_link_mode_lr10
|
||||
|
||||
if (nvswitch_does_link_need_termination_enabled(device, link))
|
||||
{
|
||||
|
||||
if (mode == NVLINK_LINKSTATE_INITPHASE1)
|
||||
{
|
||||
status = nvswitch_link_termination_setup(device, link);
|
||||
@ -2372,6 +2408,8 @@ nvswitch_load_link_disable_settings_lr10
|
||||
NvU32 val;
|
||||
NVLINK_CONFIG_DATA_LINKENTRY *vbios_link_entry = NULL;
|
||||
NVSWITCH_BIOS_NVLINK_CONFIG *bios_config;
|
||||
NvlStatus status;
|
||||
lr10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
|
||||
|
||||
bios_config = nvswitch_get_bios_nvlink_config(device);
|
||||
if ((bios_config == NULL) || (bios_config->bit_address == 0))
|
||||
@ -2412,15 +2450,16 @@ nvswitch_load_link_disable_settings_lr10
|
||||
__FUNCTION__, link->linkNumber);
|
||||
return;
|
||||
}
|
||||
val = FLD_SET_DRF(_NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, _LINK_DISABLE,
|
||||
_DISABLED, val);
|
||||
NVSWITCH_LINK_WR32_LR10(device, link->linkNumber,
|
||||
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_SYSTEM_LINK_MODE_CTRL, val);
|
||||
|
||||
// Set link to invalid and unregister from corelib
|
||||
device->link[link->linkNumber].valid = NV_FALSE;
|
||||
nvlink_lib_unregister_link(link);
|
||||
nvswitch_destroy_link(link);
|
||||
status = nvswitch_link_termination_setup(device, link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: Failed to enable termination on link #%d\n", __FUNCTION__, link->linkNumber);
|
||||
return;
|
||||
}
|
||||
// add link to disabledRemoteEndLinkMask
|
||||
chip_device->disabledRemoteEndLinkMask |= NVBIT64(link->linkNumber);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -2488,6 +2527,8 @@ nvswitch_does_link_need_termination_enabled_lr10
|
||||
NvU32 i;
|
||||
NvU32 physicalId;
|
||||
lr10_device *chip_device;
|
||||
NvU32 numNvswitches;
|
||||
NvlStatus status;
|
||||
|
||||
physicalId = nvswitch_read_physical_id(device);
|
||||
chip_device = NVSWITCH_GET_CHIP_DEVICE_LR10(device);
|
||||
@ -2510,16 +2551,30 @@ nvswitch_does_link_need_termination_enabled_lr10
|
||||
chip_device->disabledRemoteEndLinkMask = 0;
|
||||
if (nvlink_lib_is_registerd_device_with_reduced_config())
|
||||
{
|
||||
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
|
||||
{
|
||||
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
|
||||
for (i = 0; i < NUM_SWITCH_WITH_DISCONNETED_REMOTE_LINK; ++i)
|
||||
{
|
||||
chip_device->disabledRemoteEndLinkMask =
|
||||
nvswitchDisconnetedRemoteLinkMasks[i].linkMask;
|
||||
break;
|
||||
if (nvswitchDisconnetedRemoteLinkMasks[i].switchPhysicalId == physicalId)
|
||||
{
|
||||
chip_device->disabledRemoteEndLinkMask |=
|
||||
nvswitchDisconnetedRemoteLinkMasks[i].accessLinkMask;
|
||||
|
||||
status = nvlink_lib_return_device_count_by_type(NVLINK_DEVICE_TYPE_NVSWITCH, &numNvswitches);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: Failed to get nvswitch device count!\n", __FUNCTION__);
|
||||
break;
|
||||
}
|
||||
|
||||
if (numNvswitches <= NVSWITCH_NUM_DEVICES_PER_DELTA_LR10)
|
||||
{
|
||||
chip_device->disabledRemoteEndLinkMask |=
|
||||
nvswitchDisconnetedRemoteLinkMasks[i].trunkLinkMask;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chip_device->bDisabledRemoteEndLinkMaskCached = NV_TRUE;
|
||||
}
|
||||
|
@ -5525,7 +5525,7 @@ _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
|
||||
INFOROM_NVLINK_ERROR_EVENT error_event;
|
||||
|
||||
// Only enabled link errors are deffered
|
||||
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
|
||||
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
|
||||
report.raw_pending = pending;
|
||||
report.raw_enable = pending;
|
||||
report.mask = report.raw_enable;
|
||||
@ -5565,13 +5565,13 @@ _nvswitch_emit_link_errors_minion_fatal_ls10
|
||||
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
|
||||
NvU32 bit = BIT(localLinkIdx);
|
||||
|
||||
if (!chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.bPending)
|
||||
if (!chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.bPending)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab the cached interrupt data
|
||||
regData = chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr.regData;
|
||||
regData = chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.regData;
|
||||
|
||||
// get all possible interrupting links associated with this minion
|
||||
report.raw_enable = link;
|
||||
@ -5628,7 +5628,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
|
||||
NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
|
||||
NvU32 bit = BIT(localLinkIdx);
|
||||
|
||||
if (!chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.bPending)
|
||||
if (!chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.bPending)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -5637,7 +5637,7 @@ _nvswitch_emit_link_errors_minion_nonfatal_ls10
|
||||
regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN);
|
||||
|
||||
// Grab the cached interrupt data
|
||||
regData = chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr.regData;
|
||||
regData = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.regData;
|
||||
|
||||
// get all possible interrupting links associated with this minion
|
||||
report.raw_enable = link;
|
||||
@ -5675,7 +5675,7 @@ _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10
|
||||
NvU32 pending, bit, reg;
|
||||
|
||||
// Only enabled link errors are deffered
|
||||
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl;
|
||||
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl;
|
||||
report.raw_pending = pending;
|
||||
report.raw_enable = pending;
|
||||
report.mask = report.raw_enable;
|
||||
@ -5723,8 +5723,8 @@ _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10
|
||||
NvU32 injected;
|
||||
|
||||
// Only enabled link errors are deffered
|
||||
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1;
|
||||
injected = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected;
|
||||
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1;
|
||||
injected = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected;
|
||||
report.raw_pending = pending;
|
||||
report.raw_enable = pending;
|
||||
report.mask = report.raw_enable;
|
||||
@ -5760,7 +5760,7 @@ _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10
|
||||
INFOROM_NVLINK_ERROR_EVENT error_event;
|
||||
|
||||
// Only enabled link errors are deffered
|
||||
pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk;
|
||||
pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk;
|
||||
report.raw_pending = pending;
|
||||
report.raw_enable = pending;
|
||||
report.mask = report.raw_enable;
|
||||
@ -5805,11 +5805,11 @@ _nvswitch_clear_deferred_link_errors_ls10
|
||||
)
|
||||
{
|
||||
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||
NVLINK_LINK_ERROR_REPORTING *pLinkErrors;
|
||||
NVLINK_LINK_ERROR_REPORTING_DATA *pLinkErrorsData;
|
||||
|
||||
pLinkErrors = &chip_device->deferredLinkErrors[link];
|
||||
pLinkErrorsData = &chip_device->deferredLinkErrors[link].data;
|
||||
|
||||
nvswitch_os_memset(pLinkErrors, 0, sizeof(NVLINK_LINK_ERROR_REPORTING));
|
||||
nvswitch_os_memset(pLinkErrorsData, 0, sizeof(NVLINK_LINK_ERROR_REPORTING_DATA));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -5824,36 +5824,47 @@ _nvswitch_deferred_link_state_check_ls10
|
||||
NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance;
|
||||
NvU32 link = pErrorReportParams->link;
|
||||
ls10_device *chip_device;
|
||||
nvlink_link *pLink;
|
||||
NvU64 linkState;
|
||||
NvU64 lastLinkUpTime;
|
||||
NvU64 lastRetrainTime;
|
||||
NvU64 current_time = nvswitch_os_get_platform_time();
|
||||
|
||||
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||
pLink = nvswitch_get_link(device, pErrorReportParams->link);
|
||||
lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime;
|
||||
lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime;
|
||||
|
||||
// If is there a retry for reset_and_drain then re-create the state check for the current link
|
||||
if (chip_device->deferredLinkErrors[link].bResetAndDrainRetry == NV_TRUE)
|
||||
// Sanity Check
|
||||
NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link));
|
||||
|
||||
nvswitch_os_free(pErrorReportParams);
|
||||
pErrorReportParams = NULL;
|
||||
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE;
|
||||
|
||||
// Link came up after last retrain
|
||||
if (lastLinkUpTime >= lastRetrainTime)
|
||||
{
|
||||
if (pErrorReportParams)
|
||||
{
|
||||
nvswitch_os_free(pErrorReportParams);
|
||||
}
|
||||
|
||||
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
|
||||
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
|
||||
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((pLink == NULL) ||
|
||||
(device->hal.nvswitch_corelib_get_dl_link_mode(pLink, &linkState) != NVL_SUCCESS) ||
|
||||
((linkState != NVLINK_LINKSTATE_HS) && (linkState != NVLINK_LINKSTATE_SLEEP)))
|
||||
//
|
||||
// If the last time this link was up was before the last
|
||||
// reset_and_drain execution and not enough time has past since the last
|
||||
// retrain then schedule another callback.
|
||||
//
|
||||
if (lastLinkUpTime < lastRetrainTime)
|
||||
{
|
||||
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
|
||||
if ((current_time - lastRetrainTime) < NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS)
|
||||
{
|
||||
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Otherwise, the link hasn't retrained within the timeout so emit the
|
||||
// deferred errors.
|
||||
//
|
||||
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
|
||||
_nvswitch_clear_deferred_link_errors_ls10(device, link);
|
||||
nvswitch_os_free(pErrorReportParams);
|
||||
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_FALSE;
|
||||
}
|
||||
|
||||
void
|
||||
@ -5868,7 +5879,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
|
||||
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
|
||||
NvlStatus status;
|
||||
|
||||
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
|
||||
if (chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -5889,7 +5900,7 @@ nvswitch_create_deferred_link_state_check_task_ls10
|
||||
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_TRUE;
|
||||
chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -5916,25 +5927,29 @@ _nvswitch_deferred_link_errors_check_ls10
|
||||
ls10_device *chip_device;
|
||||
NvU32 pending;
|
||||
|
||||
nvswitch_os_free(pErrorReportParams);
|
||||
pErrorReportParams = NULL;
|
||||
|
||||
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_FALSE;
|
||||
|
||||
pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl;
|
||||
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending) ||
|
||||
FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending) )
|
||||
{
|
||||
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
|
||||
}
|
||||
else
|
||||
{
|
||||
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
|
||||
_nvswitch_clear_deferred_link_errors_ls10(device, link);
|
||||
}
|
||||
pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
|
||||
|
||||
if (pErrorReportParams)
|
||||
{
|
||||
nvswitch_os_free(pErrorReportParams);
|
||||
}
|
||||
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE;
|
||||
// A link fault was observed which means we also did the retrain and
|
||||
// scheduled a state check task. We can exit.
|
||||
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending))
|
||||
return;
|
||||
|
||||
if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending))
|
||||
return;
|
||||
|
||||
//
|
||||
// No link fault, emit the deferred errors.
|
||||
// It is assumed that this callback runs long before a link could have been
|
||||
// retrained and hit errors again.
|
||||
//
|
||||
_nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
|
||||
_nvswitch_clear_deferred_link_errors_ls10(device, link);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -5949,13 +5964,11 @@ _nvswitch_create_deferred_link_errors_task_ls10
|
||||
NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
|
||||
NvlStatus status;
|
||||
|
||||
if (chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled)
|
||||
if (chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE;
|
||||
|
||||
status = NVL_ERR_GENERIC;
|
||||
pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
|
||||
if(pErrorReportParams != NULL)
|
||||
@ -5972,7 +5985,7 @@ _nvswitch_create_deferred_link_errors_task_ls10
|
||||
|
||||
if (status == NVL_SUCCESS)
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_TRUE;
|
||||
chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -6026,7 +6039,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
|
||||
bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1);
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
|
||||
nvswitch_clear_flags(&unhandled, bit);
|
||||
}
|
||||
@ -6049,7 +6062,7 @@ _nvswitch_service_nvldl_nonfatal_link_ls10
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit;
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
|
||||
nvswitch_clear_flags(&unhandled, bit);
|
||||
|
||||
@ -6344,8 +6357,8 @@ _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10
|
||||
bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1);
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1 |= bit;
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected |= injected;
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1 |= bit;
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected |= injected;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
|
||||
|
||||
if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected))
|
||||
@ -6628,8 +6641,10 @@ _nvswitch_service_nvlipt_lnk_status_ls10
|
||||
NvU32 pending, enabled, unhandled, bit;
|
||||
NvU64 mode;
|
||||
nvlink_link *link;
|
||||
link = nvswitch_get_link(device, link_id);
|
||||
ls10_device *chip_device;
|
||||
|
||||
link = nvswitch_get_link(device, link_id);
|
||||
chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||
pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS);
|
||||
enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN);
|
||||
pending &= enabled;
|
||||
@ -6669,7 +6684,13 @@ _nvswitch_service_nvlipt_lnk_status_ls10
|
||||
//
|
||||
nvswitch_corelib_training_complete_ls10(link);
|
||||
nvswitch_init_buffer_ready(device, link, NV_TRUE);
|
||||
link->bRxDetected = NV_TRUE;
|
||||
link->bRxDetected = NV_TRUE;
|
||||
|
||||
//
|
||||
// Clear out any cached interrupts for the link and update the last link up timestamp
|
||||
//
|
||||
_nvswitch_clear_deferred_link_errors_ls10(device, link_id);
|
||||
chip_device->deferredLinkErrors[link_id].state.lastLinkUpTime = nvswitch_os_get_platform_time();
|
||||
}
|
||||
else if (mode == NVLINK_LINKSTATE_FAULT)
|
||||
{
|
||||
@ -6706,8 +6727,6 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
|
||||
)
|
||||
{
|
||||
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||
nvlink_link *link_info = nvswitch_get_link(device, link);
|
||||
NvU32 lnkStateRequest, linkState;
|
||||
NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
|
||||
NvU32 pending, bit, unhandled;
|
||||
INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
|
||||
@ -6743,27 +6762,10 @@ _nvswitch_service_nvlipt_lnk_nonfatal_ls10
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
//
|
||||
// Read back LINK_STATE_REQUESTS and TOP_LINK_STATE registers
|
||||
// If request == ACTIVE and TOP_LINK_STATE == FAULT there is a pending
|
||||
// fault on training so re-run reset_and_drain
|
||||
// Mark that the defered link error mechanism as seeing a reset_and_train re-try so
|
||||
// the deferred task needs to re-create itself instead of continuing with the linkstate
|
||||
// checks
|
||||
// based off of HW's assertion. FAILEDMINIONREQUEST always trails a DL fault. So no need to
|
||||
// do reset_and_drain here
|
||||
//
|
||||
linkState = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, NVLDL,
|
||||
_NVLDL, _TOP_LINK_STATE);
|
||||
|
||||
lnkStateRequest = NVSWITCH_LINK_RD32_LS10(device, link,
|
||||
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_REQUEST);
|
||||
|
||||
if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, _ACTIVE, lnkStateRequest) &&
|
||||
linkState == NV_NVLDL_TOP_LINK_STATE_STATE_FAULT)
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
|
||||
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
|
||||
}
|
||||
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk |= bit;
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk |= bit;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
|
||||
nvswitch_clear_flags(&unhandled, bit);
|
||||
}
|
||||
@ -7001,9 +7003,9 @@ _nvswitch_service_nvlw_nonfatal_ls10
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
status[0] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
status[1] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
status[2] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
status[0] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
status[1] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
status[2] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
|
||||
|
||||
if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) &&
|
||||
(status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) &&
|
||||
@ -7373,6 +7375,28 @@ nvswitch_lib_service_interrupts_ls10
|
||||
// 2. Clear leaf interrupt
|
||||
// 3. Run leaf specific interrupt handler
|
||||
//
|
||||
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
|
||||
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
|
||||
if (val != 0)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
|
||||
__FUNCTION__, val);
|
||||
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
|
||||
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
|
||||
{
|
||||
if (val & NVBIT(i))
|
||||
{
|
||||
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
|
||||
__FUNCTION__, i, status);
|
||||
return_status = status;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL);
|
||||
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val);
|
||||
if (val != 0)
|
||||
@ -7397,28 +7421,6 @@ nvswitch_lib_service_interrupts_ls10
|
||||
}
|
||||
}
|
||||
|
||||
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
|
||||
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
|
||||
if (val != 0)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
|
||||
__FUNCTION__, val);
|
||||
NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
|
||||
for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
|
||||
{
|
||||
if (val & NVBIT(i))
|
||||
{
|
||||
status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
|
||||
__FUNCTION__, i, status);
|
||||
return_status = status;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE);
|
||||
val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val);
|
||||
if (val != 0)
|
||||
@ -7757,16 +7759,16 @@ nvswitch_service_nvldl_fatal_link_ls10
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
{
|
||||
dlDeferredIntrLinkMask |= bit;
|
||||
dlDeferredIntrLinkMask |= bit;
|
||||
|
||||
//
|
||||
// Since reset and drain will reset the link, including clearing
|
||||
// pending interrupts, skip the clear write below. There are cases
|
||||
// where link clocks will not be on after reset and drain so there
|
||||
// maybe PRI errors on writing to the register
|
||||
//
|
||||
bRequireResetAndDrain = NV_TRUE;
|
||||
}
|
||||
//
|
||||
// Since reset and drain will reset the link, including clearing
|
||||
// pending interrupts, skip the clear write below. There are cases
|
||||
// where link clocks will not be on after reset and drain so there
|
||||
// maybe PRI errors on writing to the register
|
||||
//
|
||||
bRequireResetAndDrain = NV_TRUE;
|
||||
}
|
||||
nvswitch_clear_flags(&unhandled, bit);
|
||||
}
|
||||
|
||||
@ -7774,41 +7776,25 @@ nvswitch_service_nvldl_fatal_link_ls10
|
||||
if (nvswitch_test_flags(pending, bit))
|
||||
{
|
||||
{
|
||||
dlDeferredIntrLinkMask |= bit;
|
||||
dlDeferredIntrLinkMask |= bit;
|
||||
|
||||
//
|
||||
// Since reset and drain will reset the link, including clearing
|
||||
// pending interrupts, skip the clear write below. There are cases
|
||||
// where link clocks will not be on after reset and drain so there
|
||||
// maybe PRI errors on writing to the register
|
||||
//
|
||||
bRequireResetAndDrain = NV_TRUE;
|
||||
}
|
||||
//
|
||||
// Since reset and drain will reset the link, including clearing
|
||||
// pending interrupts, skip the clear write below. There are cases
|
||||
// where link clocks will not be on after reset and drain so there
|
||||
// maybe PRI errors on writing to the register
|
||||
//
|
||||
bRequireResetAndDrain = NV_TRUE;
|
||||
}
|
||||
nvswitch_clear_flags(&unhandled, bit);
|
||||
}
|
||||
|
||||
if (bRequireResetAndDrain)
|
||||
{
|
||||
//
|
||||
// If there is a link state callback enabled for this link then
|
||||
// we hit a consecutive FAULT_UP error. set bResetAndDrainRetry
|
||||
// so the current callback on completion can create a new
|
||||
// callback to retry the link state check to account for the added
|
||||
// delay caused by taking a 2nd fault and having to re-train
|
||||
//
|
||||
// If there is no callback enabled then set the error mask
|
||||
// and create the link errors deferred task.
|
||||
//
|
||||
if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled)
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
chip_device->deferredLinkErrors[link].fatalIntrMask.dl = dlDeferredIntrLinkMask;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
|
||||
}
|
||||
chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl |= dlDeferredIntrLinkMask;
|
||||
device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link));
|
||||
chip_device->deferredLinkErrors[link].state.lastRetrainTime = nvswitch_os_get_platform_time();
|
||||
nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
|
||||
}
|
||||
|
||||
NVSWITCH_UNHANDLED_CHECK(device, unhandled);
|
||||
@ -7916,7 +7902,7 @@ nvswitch_service_minion_link_ls10
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT:
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL:
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT:
|
||||
chip_device->deferredLinkErrors[link].fatalIntrMask.minionLinkIntr =
|
||||
chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr =
|
||||
minionLinkIntr;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
|
||||
break;
|
||||
@ -7928,7 +7914,7 @@ nvswitch_service_minion_link_ls10
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ:
|
||||
chip_device->deferredLinkErrors[link].nonFatalIntrMask.minionLinkIntr =
|
||||
chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr =
|
||||
minionLinkIntr;
|
||||
_nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
|
||||
case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY:
|
||||
|
@ -98,6 +98,30 @@ _nvswitch_configure_reserved_throughput_counters
|
||||
DRF_DEF(_NVLTLC_TX_LNK, _DEBUG_TP_CNTR_CTRL_0, _ENABLE, _ENABLE));
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_program_l1_scratch_reg_ls10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 linkNumber
|
||||
)
|
||||
{
|
||||
NvU32 scrRegVal;
|
||||
NvU32 tempRegVal;
|
||||
|
||||
// Read L1 register and store initial/VBIOS L1 Threshold Value in Scratch register
|
||||
tempRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD);
|
||||
|
||||
scrRegVal = NVSWITCH_LINK_RD32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM);
|
||||
|
||||
// Update the scratch register value only if it has not been written to before
|
||||
if (scrRegVal == NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT)
|
||||
{
|
||||
NVSWITCH_LINK_WR32_LS10(device, linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _SCRATCH_WARM, tempRegVal);
|
||||
}
|
||||
}
|
||||
|
||||
#define BUG_3797211_LS10_VBIOS_VERSION 0x9610410000
|
||||
|
||||
void
|
||||
nvswitch_init_lpwr_regs_ls10
|
||||
(
|
||||
@ -110,33 +134,56 @@ nvswitch_init_lpwr_regs_ls10
|
||||
NvU32 tempRegVal, lpEntryThreshold;
|
||||
NvU8 softwareDesired;
|
||||
NvBool bLpEnable;
|
||||
NvU64 biosVersion;
|
||||
|
||||
if (device->regkeys.enable_pm == NV_SWITCH_REGKEY_ENABLE_PM_NO)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// bios_config = nvswitch_get_bios_nvlink_config(device);
|
||||
|
||||
// IC Enter Threshold
|
||||
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
|
||||
if (nvswitch_lib_get_bios_version(device, &biosVersion) != NVL_SUCCESS)
|
||||
{
|
||||
//
|
||||
// TODO: get from bios. Refer Bug 3626523 for more info.
|
||||
//
|
||||
// The threshold is measured in 100us unit. So lpEntryThreshold = 1
|
||||
// means the threshold is set to 100us in the register.
|
||||
//
|
||||
lpEntryThreshold = 1;
|
||||
NVSWITCH_PRINT(device, WARN, "%s Get VBIOS version failed.\n",
|
||||
__FUNCTION__);
|
||||
biosVersion = 0;
|
||||
}
|
||||
|
||||
// bios_config = nvswitch_get_bios_nvlink_config(device);
|
||||
if (biosVersion >= BUG_3797211_LS10_VBIOS_VERSION)
|
||||
{
|
||||
// IC Enter Threshold
|
||||
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
|
||||
{
|
||||
//
|
||||
// Do nothing since VBIOS (version 96.10.41.00.00 and above)
|
||||
// sets the default L1 threshold.
|
||||
// Refer Bug 3797211 for more info.
|
||||
//
|
||||
}
|
||||
else
|
||||
{
|
||||
lpEntryThreshold = device->regkeys.lp_threshold;
|
||||
tempRegVal = 0;
|
||||
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
|
||||
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
lpEntryThreshold = device->regkeys.lp_threshold;
|
||||
}
|
||||
// IC Enter Threshold
|
||||
if (device->regkeys.lp_threshold == NV_SWITCH_REGKEY_SET_LP_THRESHOLD_DEFAULT)
|
||||
{
|
||||
lpEntryThreshold = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
lpEntryThreshold = device->regkeys.lp_threshold;
|
||||
}
|
||||
|
||||
tempRegVal = 0;
|
||||
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
|
||||
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
|
||||
tempRegVal = 0;
|
||||
tempRegVal = FLD_SET_DRF_NUM(_NVLIPT, _LNK_PWRM_L1_ENTER_THRESHOLD, _THRESHOLD, lpEntryThreshold, tempRegVal);
|
||||
NVSWITCH_LINK_WR32_LS10(device, linkNum, NVLIPT_LNK, _NVLIPT_LNK, _PWRM_L1_ENTER_THRESHOLD, tempRegVal);
|
||||
}
|
||||
|
||||
//LP Entry Enable
|
||||
bLpEnable = NV_TRUE;
|
||||
@ -1423,7 +1470,7 @@ nvswitch_load_link_disable_settings_ls10
|
||||
nvswitch_device *device,
|
||||
nvlink_link *link
|
||||
)
|
||||
{
|
||||
{
|
||||
NvU32 regVal;
|
||||
|
||||
// Read state from NVLIPT HW
|
||||
@ -1432,7 +1479,7 @@ nvswitch_load_link_disable_settings_ls10
|
||||
|
||||
if (FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _DISABLE, regVal))
|
||||
{
|
||||
|
||||
|
||||
// Set link to invalid and unregister from corelib
|
||||
device->link[link->linkNumber].valid = NV_FALSE;
|
||||
nvlink_lib_unregister_link(link);
|
||||
@ -1473,7 +1520,7 @@ nvswitch_execute_unilateral_link_shutdown_ls10
|
||||
// Status is explicitly ignored here since we are required to soldier-on
|
||||
// in this scenario
|
||||
//
|
||||
status = nvswitch_request_tl_link_state_lr10(link,
|
||||
status = nvswitch_request_tl_link_state_ls10(link,
|
||||
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_SHUTDOWN, NV_TRUE);
|
||||
|
||||
if (status == NVL_SUCCESS)
|
||||
@ -1492,22 +1539,22 @@ nvswitch_execute_unilateral_link_shutdown_ls10
|
||||
{
|
||||
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
|
||||
|
||||
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO,
|
||||
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
|
||||
__FUNCTION__, link_intr_subcode);
|
||||
//
|
||||
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
|
||||
// or MINION_ALARM_BUSY
|
||||
//
|
||||
retry_count--;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO,
|
||||
"%s: Retrying shutdown due to Minion DLCMD Fault subcode = 0x%x\n",
|
||||
__FUNCTION__, link_intr_subcode);
|
||||
//
|
||||
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
|
||||
// or MINION_ALARM_BUSY
|
||||
//
|
||||
retry_count--;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1542,6 +1589,12 @@ nvswitch_reset_and_train_link_ls10
|
||||
nvswitch_execute_unilateral_link_shutdown_ls10(link);
|
||||
nvswitch_corelib_clear_link_state_ls10(link);
|
||||
|
||||
//
|
||||
// When a link faults there could be a race between the driver requesting
|
||||
// reset and MINION processing Emergency Shutdown. Minion will notify if
|
||||
// such a collision happens and will deny the reset request, so try the
|
||||
// request up to 3 times
|
||||
//
|
||||
do
|
||||
{
|
||||
status = nvswitch_request_tl_link_state_ls10(link,
|
||||
@ -1565,24 +1618,24 @@ nvswitch_reset_and_train_link_ls10
|
||||
{
|
||||
link_intr_subcode = DRF_VAL(_NVLSTAT, _MN00, _LINK_INTR_SUBCODE, stat_data);
|
||||
|
||||
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
{
|
||||
if ((link_state == NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_STATUS_MINION_REQUEST_FAIL) &&
|
||||
(link_intr_subcode == MINION_ALARM_BUSY))
|
||||
{
|
||||
|
||||
status = nvswitch_request_tl_link_state_ls10(link,
|
||||
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
|
||||
status = nvswitch_request_tl_link_state_ls10(link,
|
||||
NV_NVLIPT_LNK_CTRL_LINK_STATE_REQUEST_REQUEST_RESET, NV_TRUE);
|
||||
|
||||
//
|
||||
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
|
||||
// or MINION_ALARM_BUSY
|
||||
//
|
||||
retry_count--;
|
||||
//
|
||||
// We retry the shutdown sequence 3 times when we see a MINION_REQUEST_FAIL
|
||||
// or MINION_ALARM_BUSY
|
||||
//
|
||||
retry_count--;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// failed to query minion for the link_intr_subcode so retry
|
||||
@ -1597,15 +1650,18 @@ nvswitch_reset_and_train_link_ls10
|
||||
"%s: NvLink Reset has failed for link %d\n",
|
||||
__FUNCTION__, link->linkNumber);
|
||||
|
||||
// Re-register links.
|
||||
status = nvlink_lib_register_link(device->nvlink_device, link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
nvswitch_destroy_link(link);
|
||||
return status;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
status = nvswitch_launch_ALI_link_training(device, link, NV_FALSE);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: NvLink failed to request ACTIVE for link %d\n",
|
||||
__FUNCTION__, link->linkNumber);
|
||||
return status;
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1657,6 +1713,76 @@ nvswitch_are_link_clocks_on_ls10
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_request_tl_link_state_ls10
|
||||
(
|
||||
nvlink_link *link,
|
||||
NvU32 tlLinkState,
|
||||
NvBool bSync
|
||||
)
|
||||
{
|
||||
nvswitch_device *device = link->dev->pDevInfo;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
NvU32 linkStatus;
|
||||
NvU32 lnkErrStatus;
|
||||
NvU32 bit;
|
||||
|
||||
if (!NVSWITCH_IS_LINK_ENG_VALID_LS10(device, NVLIPT_LNK, link->linkNumber))
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: link #%d invalid\n",
|
||||
__FUNCTION__, link->linkNumber);
|
||||
return -NVL_UNBOUND_DEVICE;
|
||||
}
|
||||
|
||||
// Wait for the TL link state register to report ready
|
||||
status = nvswitch_wait_for_tl_request_ready_lr10(link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
// Clear any pending FAILEDMINIONREQUEST status that maybe populated as it is stale now
|
||||
bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
|
||||
lnkErrStatus = NVSWITCH_LINK_RD32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
|
||||
if (nvswitch_test_flags(lnkErrStatus, bit))
|
||||
{
|
||||
NVSWITCH_LINK_WR32(device, link->linkNumber, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0,
|
||||
bit);
|
||||
}
|
||||
|
||||
|
||||
// Request state through CTRL_LINK_STATE_REQUEST
|
||||
NVSWITCH_LINK_WR32_LS10(device, link->linkNumber,
|
||||
NVLIPT_LNK, _NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST,
|
||||
DRF_NUM(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, tlLinkState));
|
||||
|
||||
if (bSync)
|
||||
{
|
||||
// Wait for the TL link state register to complete
|
||||
status = nvswitch_wait_for_tl_request_ready_lr10(link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
// Check for state requested
|
||||
linkStatus = NVSWITCH_LINK_RD32_LS10(device, link->linkNumber,
|
||||
NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_STATUS);
|
||||
|
||||
if (DRF_VAL(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, linkStatus) !=
|
||||
tlLinkState)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR,
|
||||
"%s: TL link state request to state 0x%x for link #%d did not complete!\n",
|
||||
__FUNCTION__, tlLinkState, link->linkNumber);
|
||||
return -NVL_ERR_GENERIC;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NvBool
|
||||
nvswitch_does_link_need_termination_enabled_ls10
|
||||
(
|
||||
|
@ -1353,7 +1353,53 @@ nvswitch_init_warm_reset_ls10
|
||||
)
|
||||
{
|
||||
NVSWITCH_PRINT(device, WARN, "%s: Function not implemented\n", __FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Helper funcction to query MINION to see if DL clocks are on
|
||||
// return NV_TRUE if the clocks are on
|
||||
// NV_FALSE if the clocks are off
|
||||
static
|
||||
NvBool
|
||||
_nvswitch_are_dl_clocks_on
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 linkNumber
|
||||
)
|
||||
{
|
||||
NvU32 link_state;
|
||||
NvU32 stat_data;
|
||||
NvlStatus status = NVL_SUCCESS;
|
||||
nvlink_link * link= nvswitch_get_link(device, linkNumber);
|
||||
|
||||
if (link == NULL)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: invalid link %d\n",
|
||||
__FUNCTION__, linkNumber);
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
status = nvswitch_minion_get_dl_status(device, linkNumber,
|
||||
NV_NVLSTAT_UC01, 0, &stat_data);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
|
||||
switch(link_state)
|
||||
{
|
||||
case LINKSTATUS_RESET:
|
||||
case LINKSTATUS_UNINIT:
|
||||
return NV_FALSE;
|
||||
case LINKSTATUS_LANESHUTDOWN:
|
||||
case LINKSTATUS_ACTIVE_PENDING:
|
||||
return nvswitch_are_link_clocks_on_ls10(device, link,
|
||||
NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK));
|
||||
}
|
||||
|
||||
return NV_TRUE;
|
||||
}
|
||||
|
||||
//
|
||||
// Implement reset and drain sequence for ls10
|
||||
@ -1586,10 +1632,10 @@ nvswitch_reset_and_drain_links_ls10
|
||||
nvswitch_soe_restore_nport_state_ls10(device, link);
|
||||
|
||||
// Step 7.0 : Re-program the routing table for DBEs
|
||||
|
||||
|
||||
// Step 8.0 : Reset NVLW and NPORT interrupt state
|
||||
_nvswitch_link_reset_interrupts_ls10(device, link);
|
||||
|
||||
|
||||
// Re-register links.
|
||||
status = nvlink_lib_register_link(device->nvlink_device, link_info);
|
||||
if (status != NVL_SUCCESS)
|
||||
@ -1625,21 +1671,9 @@ nvswitch_reset_and_drain_links_ls10
|
||||
do
|
||||
{
|
||||
bKeepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
|
||||
bAreDlClocksOn = _nvswitch_are_dl_clocks_on(device, link);
|
||||
|
||||
status = nvswitch_minion_get_dl_status(device, link_info->linkNumber,
|
||||
NV_NVLSTAT_UC01, 0, &stat_data);
|
||||
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
link_state = DRF_VAL(_NVLSTAT, _UC01, _LINK_STATE, stat_data);
|
||||
|
||||
bAreDlClocksOn = (link_state != LINKSTATUS_INITPHASE1) ?
|
||||
NV_TRUE:NV_FALSE;
|
||||
|
||||
if (bAreDlClocksOn == NV_TRUE)
|
||||
if (bAreDlClocksOn)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1345,7 +1345,6 @@ nvswitch_lib_initialize_device
|
||||
NvU8 link_num;
|
||||
nvlink_link *link = NULL;
|
||||
NvBool is_blacklisted_by_os = NV_FALSE;
|
||||
NvU64 mode;
|
||||
|
||||
if (!NVSWITCH_IS_DEVICE_ACCESSIBLE(device))
|
||||
{
|
||||
@ -1508,18 +1507,19 @@ nvswitch_lib_initialize_device
|
||||
|
||||
nvswitch_reset_persistent_link_hw_state(device, link_num);
|
||||
|
||||
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
}
|
||||
else if(mode == NVLINK_LINKSTATE_FAULT)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
nvswitch_reset_and_train_link(device, link);
|
||||
}
|
||||
//
|
||||
// During Nvswitch initialization, the default L1 thresholds are programmed by the
|
||||
// BIOS from the BIOS tables. Save these L1 Threshold Values in scratch registers
|
||||
// for use when resetting the thresholds to default.
|
||||
//
|
||||
nvswitch_program_l1_scratch_reg(device, link_num);
|
||||
|
||||
//
|
||||
// WAR : Initializing the L1 threshold registers at this point as a WAR for
|
||||
// Bug 3963639 where it was discussed that the L1 threshold register should have
|
||||
// the default value for all available links and not just for active links.
|
||||
//
|
||||
nvswitch_init_lpwr_regs(link);
|
||||
}
|
||||
|
||||
retval = nvswitch_set_training_mode(device);
|
||||
@ -1623,6 +1623,10 @@ nvswitch_lib_post_init_device
|
||||
)
|
||||
{
|
||||
NvlStatus retval;
|
||||
NvlStatus status;
|
||||
NvU32 link_num;
|
||||
NvU64 mode;
|
||||
nvlink_link *link;
|
||||
|
||||
if (!NVSWITCH_IS_DEVICE_INITIALIZED(device))
|
||||
{
|
||||
@ -1634,7 +1638,7 @@ nvswitch_lib_post_init_device
|
||||
{
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
if (nvswitch_is_bios_supported(device))
|
||||
{
|
||||
retval = nvswitch_bios_get_image(device);
|
||||
@ -1670,6 +1674,41 @@ nvswitch_lib_post_init_device
|
||||
(void)nvswitch_launch_ALI(device);
|
||||
}
|
||||
|
||||
//
|
||||
// There is an edge case where a hypervisor may not send same number
|
||||
// of reset to switch and GPUs, so try to re-train links in fault
|
||||
// if possible
|
||||
//
|
||||
for (link_num=0; link_num < nvswitch_get_num_links(device); link_num++)
|
||||
{
|
||||
// Sanity check
|
||||
if (!nvswitch_is_link_valid(device, link_num))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
status = nvlink_lib_get_link(device->nvlink_device, link_num, &link);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: Failed to get link for LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the link is in fault then re-train
|
||||
if(_nvswitch_corelib_get_dl_link_mode(link, &mode) != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
}
|
||||
else if(mode == NVLINK_LINKSTATE_FAULT)
|
||||
{
|
||||
NVSWITCH_PRINT(device, INFO, "%s: retraining LinkId %d\n",
|
||||
__FUNCTION__, link_num);
|
||||
nvswitch_reset_and_train_link(device, link);
|
||||
}
|
||||
}
|
||||
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -4617,6 +4656,16 @@ nvswitch_init_lpwr_regs
|
||||
device->hal.nvswitch_init_lpwr_regs(link);
|
||||
}
|
||||
|
||||
void
|
||||
nvswitch_program_l1_scratch_reg
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU32 linkNumber
|
||||
)
|
||||
{
|
||||
device->hal.nvswitch_program_l1_scratch_reg(device, linkNumber);
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_launch_ALI
|
||||
(
|
||||
|
@ -121,7 +121,8 @@
|
||||
#define NVLINK_FLA_PRIV_ERR (137)
|
||||
#define ROBUST_CHANNEL_DLA_ERROR (138)
|
||||
#define ROBUST_CHANNEL_FAST_PATH_ERROR (139)
|
||||
#define ROBUST_CHANNEL_LAST_ERROR (ROBUST_CHANNEL_FAST_PATH_ERROR)
|
||||
#define UNRECOVERABLE_ECC_ERROR_ESCAPE (140)
|
||||
#define ROBUST_CHANNEL_LAST_ERROR (UNRECOVERABLE_ECC_ERROR_ESCAPE)
|
||||
|
||||
|
||||
// Indexed CE reference
|
||||
|
@ -95,7 +95,7 @@ NV_CRASHCAT_PACKET_FORMAT_VERSION crashcatPacketHeaderFormatVersion(NvCrashCatPa
|
||||
static NV_INLINE
|
||||
NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
|
||||
{
|
||||
NvU8 unitShift;
|
||||
NvU8 unitShift = 0;
|
||||
NV_CRASHCAT_MEM_UNIT_SIZE unitSize =
|
||||
(NV_CRASHCAT_MEM_UNIT_SIZE)DRF_VAL64(_CRASHCAT, _PACKET_HEADER, _PAYLOAD_UNIT_SIZE, hdr);
|
||||
switch (unitSize)
|
||||
@ -104,7 +104,6 @@ NvLength crashcatPacketHeaderPayloadSize(NvCrashCatPacketHeader hdr)
|
||||
case NV_CRASHCAT_MEM_UNIT_SIZE_1KB: unitShift = 10; break;
|
||||
case NV_CRASHCAT_MEM_UNIT_SIZE_4KB: unitShift = 12; break;
|
||||
case NV_CRASHCAT_MEM_UNIT_SIZE_64KB: unitShift = 16; break;
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
// Increment size, since the size in the header is size - 1 (payload of 0 size is not encodable)
|
||||
|
@ -1221,6 +1221,29 @@ typedef struct _NVEvoDevRec {
|
||||
} apiHead[NVKMS_MAX_HEADS_PER_DISP];
|
||||
} NVDevEvoRec;
|
||||
|
||||
static inline NvBool nvEvoIsConsoleActive(const NVDevEvoRec *pDevEvo)
|
||||
{
|
||||
/*
|
||||
* If (pDevEvo->modesetOwner == NULL) that means either the vbios
|
||||
* console or the NVKMS console might be active.
|
||||
*/
|
||||
if (pDevEvo->modesetOwner == NULL) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* If (pDevEvo->modesetOwner != NULL) but
|
||||
* pDevEvo->modesetOwnerChanged is TRUE, that means the modeset
|
||||
* ownership is grabbed by the external client but it hasn't
|
||||
* performed any modeset and the console is still active.
|
||||
*/
|
||||
if ((pDevEvo->modesetOwner != NULL) && pDevEvo->modesetOwnerChanged) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* The NVHwModeTimingsEvo structure stores all the values necessary to
|
||||
* perform a modeset with EVO
|
||||
|
@ -274,8 +274,12 @@ NvU32 nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr, size_t l2CacheSize)
|
||||
NvU32 i;
|
||||
NvU32 status;
|
||||
|
||||
/* If DIFR is disabled it's because we know we were or will be flipping. */
|
||||
if (pDifr->hwDisabled) {
|
||||
/*
|
||||
* If DIFR is disabled it's because we know we were or will be flipping, or
|
||||
* if console is active then the scanout surfaces will get updated by the
|
||||
* OS console driver without any knowledge of NVKMS.
|
||||
*/
|
||||
if (pDifr->hwDisabled || nvEvoIsConsoleActive(pDevEvo)) {
|
||||
return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
|
||||
}
|
||||
|
||||
|
@ -1069,23 +1069,11 @@ void nvHsConfigInitSwapGroup(
|
||||
NVHsChannelConfig *pChannelConfig = &pHsConfigOneHead->channelConfig;
|
||||
|
||||
/*
|
||||
* If (pDevEvo->modesetOwner == NULL) that means either the vbios
|
||||
* console or the NVKMS console might be active, the console
|
||||
* surface may not be set up to be the source of headSurface
|
||||
* operations, and NVKMS may be unloaded, so we can't have the
|
||||
* display rely on headSurface.
|
||||
* The console surface may not be set up to be the source of
|
||||
* headSurface operations, and NVKMS may be unloaded, so we can't
|
||||
* have the display rely on headSurface.
|
||||
*/
|
||||
if (pDevEvo->modesetOwner == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If (pDevEvo->modesetOwner != NULL) but
|
||||
* pDevEvo->modesetOwnerChanged is TRUE, that means the modeset
|
||||
* ownership is grabbed by the external client but it hasn't
|
||||
* performed any modeset and the console is still active.
|
||||
*/
|
||||
if ((pDevEvo->modesetOwner != NULL) && pDevEvo->modesetOwnerChanged) {
|
||||
if (nvEvoIsConsoleActive(pDevEvo)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -186,6 +186,7 @@ CSINFO chipsetInfo[] =
|
||||
{PCI_VENDOR_ID_INTEL, 0x7A82, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc},
|
||||
{PCI_VENDOR_ID_INTEL, 0x7A84, CS_INTEL_7A82, "Intel-AlderLake", Intel_7A82_setupFunc},
|
||||
{PCI_VENDOR_ID_INTEL, 0x1B81, CS_INTEL_1B81, "Intel-SapphireRapids", NULL},
|
||||
{PCI_VENDOR_ID_INTEL, 0x7A8A, CS_INTEL_1B81, "Intel-SapphireRapids", NULL},
|
||||
{PCI_VENDOR_ID_INTEL, 0x18DC, CS_INTEL_18DC, "Intel-IceLake", NULL},
|
||||
{PCI_VENDOR_ID_INTEL, 0x7A04, CS_INTEL_7A04, "Intel-RaptorLake", Intel_7A04_setupFunc},
|
||||
|
||||
|
@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
void* NV_API_CALL os_get_pid_info(void);
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info);
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
|
@ -684,6 +684,21 @@ NV_STATUS osGetCurrentThread(OS_THREAD_HANDLE *pThreadId)
|
||||
return rmStatus;
|
||||
}
|
||||
|
||||
void* osGetPidInfo(void)
|
||||
{
|
||||
return os_get_pid_info();
|
||||
}
|
||||
|
||||
void osPutPidInfo(void *pOsPidInfo)
|
||||
{
|
||||
os_put_pid_info(pOsPidInfo);
|
||||
}
|
||||
|
||||
NV_STATUS osFindNsPid(void *pOsPidInfo, NvU32 *pNsPid)
|
||||
{
|
||||
return os_find_ns_pid(pOsPidInfo, pNsPid);
|
||||
}
|
||||
|
||||
NV_STATUS osAttachToProcess(void** ppProcessInfo, NvU32 ProcessId)
|
||||
{
|
||||
//
|
||||
@ -5371,6 +5386,28 @@ osReleaseGpuOsInfo
|
||||
nv_put_file_private(pOsInfo);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Get free, total memory of a NUMA node by NUMA node ID from kernel.
|
||||
*
|
||||
* @param[in] numaId NUMA node ID.
|
||||
* @param[out] free_memory_bytes free memory in bytes.
|
||||
* @param[out] total_memory_bytes total memory in bytes.
|
||||
*
|
||||
*/
|
||||
void
|
||||
osGetNumaMemoryUsage
|
||||
(
|
||||
NvS32 numaId,
|
||||
NvU64 *free_memory_bytes,
|
||||
NvU64 *total_memory_bytes
|
||||
)
|
||||
{
|
||||
NV_STATUS status = os_get_numa_node_memory_usage(numaId,
|
||||
free_memory_bytes,
|
||||
total_memory_bytes);
|
||||
NV_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Add GPU memory as a NUMA node.
|
||||
*
|
||||
|
@ -140,6 +140,7 @@ struct RmClient {
|
||||
NvU32 Flags;
|
||||
NvU32 ClientDebuggerState;
|
||||
void *pOSInfo;
|
||||
void *pOsPidInfo;
|
||||
char name[100];
|
||||
CLI_SYSTEM_EVENT_INFO CliSysEventInfo;
|
||||
PSECURITY_TOKEN pSecurityToken;
|
||||
|
@ -492,6 +492,17 @@ static void __nvoc_init_funcTable_OBJGPU_1(OBJGPU *pThis) {
|
||||
pThis->__gpuWriteFunctionConfigRegEx__ = &gpuWriteFunctionConfigRegEx_GM107;
|
||||
}
|
||||
|
||||
// Hal function -- gpuReadVgpuConfigReg
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
pThis->__gpuReadVgpuConfigReg__ = &gpuReadVgpuConfigReg_GH100;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__gpuReadVgpuConfigReg__ = &gpuReadVgpuConfigReg_46f6a7;
|
||||
}
|
||||
|
||||
// Hal function -- gpuGetIdInfo
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
|
@ -877,6 +877,7 @@ struct OBJGPU {
|
||||
NV_STATUS (*__gpuReadFunctionConfigReg__)(struct OBJGPU *, NvU32, NvU32, NvU32 *);
|
||||
NV_STATUS (*__gpuWriteFunctionConfigReg__)(struct OBJGPU *, NvU32, NvU32, NvU32);
|
||||
NV_STATUS (*__gpuWriteFunctionConfigRegEx__)(struct OBJGPU *, NvU32, NvU32, NvU32, THREAD_STATE_NODE *);
|
||||
NV_STATUS (*__gpuReadVgpuConfigReg__)(struct OBJGPU *, NvU32, NvU32 *);
|
||||
void (*__gpuGetIdInfo__)(struct OBJGPU *);
|
||||
void (*__gpuHandleSanityCheckRegReadError__)(struct OBJGPU *, NvU32, NvU32);
|
||||
void (*__gpuHandleSecFault__)(struct OBJGPU *);
|
||||
@ -1007,6 +1008,9 @@ struct OBJGPU {
|
||||
NvU32 subdeviceInstance;
|
||||
NvS32 numaNodeId;
|
||||
_GPU_UUID gpuUuid;
|
||||
NvU32 gpuPhysicalId;
|
||||
NvU32 gpuTerminatedLinkMask;
|
||||
NvBool gpuLinkTerminationEnabled;
|
||||
NvBool gspRmInitialized;
|
||||
_GPU_PCIE_PEER_CLIQUE pciePeerClique;
|
||||
NvU32 i2cPortForExtdev;
|
||||
@ -1427,6 +1431,8 @@ NV_STATUS __nvoc_objCreate_OBJGPU(OBJGPU**, Dynamic*, NvU32,
|
||||
#define gpuWriteFunctionConfigReg_HAL(pGpu, function, reg, data) gpuWriteFunctionConfigReg_DISPATCH(pGpu, function, reg, data)
|
||||
#define gpuWriteFunctionConfigRegEx(pGpu, function, reg, data, pThreadState) gpuWriteFunctionConfigRegEx_DISPATCH(pGpu, function, reg, data, pThreadState)
|
||||
#define gpuWriteFunctionConfigRegEx_HAL(pGpu, function, reg, data, pThreadState) gpuWriteFunctionConfigRegEx_DISPATCH(pGpu, function, reg, data, pThreadState)
|
||||
#define gpuReadVgpuConfigReg(pGpu, index, data) gpuReadVgpuConfigReg_DISPATCH(pGpu, index, data)
|
||||
#define gpuReadVgpuConfigReg_HAL(pGpu, index, data) gpuReadVgpuConfigReg_DISPATCH(pGpu, index, data)
|
||||
#define gpuGetIdInfo(pGpu) gpuGetIdInfo_DISPATCH(pGpu)
|
||||
#define gpuGetIdInfo_HAL(pGpu) gpuGetIdInfo_DISPATCH(pGpu)
|
||||
#define gpuHandleSanityCheckRegReadError(pGpu, addr, value) gpuHandleSanityCheckRegReadError_DISPATCH(pGpu, addr, value)
|
||||
@ -2422,6 +2428,19 @@ static inline void gpuUpdateUserSharedData(struct OBJGPU *pGpu) {
|
||||
|
||||
#define gpuUpdateUserSharedData_HAL(pGpu) gpuUpdateUserSharedData(pGpu)
|
||||
|
||||
void gpuGetTerminatedLinkMask_GA100(struct OBJGPU *pGpu, NvU32 arg0);
|
||||
|
||||
|
||||
#ifdef __nvoc_gpu_h_disabled
|
||||
static inline void gpuGetTerminatedLinkMask(struct OBJGPU *pGpu, NvU32 arg0) {
|
||||
NV_ASSERT_FAILED_PRECOMP("OBJGPU was disabled!");
|
||||
}
|
||||
#else //__nvoc_gpu_h_disabled
|
||||
#define gpuGetTerminatedLinkMask(pGpu, arg0) gpuGetTerminatedLinkMask_GA100(pGpu, arg0)
|
||||
#endif //__nvoc_gpu_h_disabled
|
||||
|
||||
#define gpuGetTerminatedLinkMask_HAL(pGpu, arg0) gpuGetTerminatedLinkMask(pGpu, arg0)
|
||||
|
||||
NV_STATUS gpuJtVersionSanityCheck_TU102(struct OBJGPU *pGpu);
|
||||
|
||||
|
||||
@ -2970,6 +2989,16 @@ static inline NV_STATUS gpuWriteFunctionConfigRegEx_DISPATCH(struct OBJGPU *pGpu
|
||||
return pGpu->__gpuWriteFunctionConfigRegEx__(pGpu, function, reg, data, pThreadState);
|
||||
}
|
||||
|
||||
NV_STATUS gpuReadVgpuConfigReg_GH100(struct OBJGPU *pGpu, NvU32 index, NvU32 *data);
|
||||
|
||||
static inline NV_STATUS gpuReadVgpuConfigReg_46f6a7(struct OBJGPU *pGpu, NvU32 index, NvU32 *data) {
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static inline NV_STATUS gpuReadVgpuConfigReg_DISPATCH(struct OBJGPU *pGpu, NvU32 index, NvU32 *data) {
|
||||
return pGpu->__gpuReadVgpuConfigReg__(pGpu, index, data);
|
||||
}
|
||||
|
||||
void gpuGetIdInfo_GM107(struct OBJGPU *pGpu);
|
||||
|
||||
void gpuGetIdInfo_GH100(struct OBJGPU *pGpu);
|
||||
|
@ -137,10 +137,14 @@ void __nvoc_dtor_KernelFsp(KernelFsp *pThis) {
|
||||
void __nvoc_init_dataField_KernelFsp(KernelFsp *pThis, RmHalspecOwner *pRmhalspecowner) {
|
||||
ChipHal *chipHal = &pRmhalspecowner->chipHal;
|
||||
const unsigned long chipHal_HalVarIdx = (unsigned long)chipHal->__nvoc_HalVarIdx;
|
||||
RmVariantHal *rmVariantHal = &pRmhalspecowner->rmVariantHal;
|
||||
const unsigned long rmVariantHal_HalVarIdx = (unsigned long)rmVariantHal->__nvoc_HalVarIdx;
|
||||
PORT_UNREFERENCED_VARIABLE(pThis);
|
||||
PORT_UNREFERENCED_VARIABLE(pRmhalspecowner);
|
||||
PORT_UNREFERENCED_VARIABLE(chipHal);
|
||||
PORT_UNREFERENCED_VARIABLE(chipHal_HalVarIdx);
|
||||
PORT_UNREFERENCED_VARIABLE(rmVariantHal);
|
||||
PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx);
|
||||
|
||||
// NVOC Property Hal field -- PDB_PROP_KFSP_IS_MISSING
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
@ -152,6 +156,12 @@ void __nvoc_init_dataField_KernelFsp(KernelFsp *pThis, RmHalspecOwner *pRmhalspe
|
||||
{
|
||||
pThis->setProperty(pThis, PDB_PROP_KFSP_IS_MISSING, ((NvBool)(0 == 0)));
|
||||
}
|
||||
|
||||
// NVOC Property Hal field -- PDB_PROP_KFSP_DISABLE_FRTS_SYSMEM
|
||||
if (( ((rmVariantHal_HalVarIdx >> 5) == 0UL) && ((1UL << (rmVariantHal_HalVarIdx & 0x1f)) & 0x00000002UL) )) /* RmVariantHal: PF_KERNEL_ONLY */
|
||||
{
|
||||
pThis->setProperty(pThis, PDB_PROP_KFSP_DISABLE_FRTS_SYSMEM, ((NvBool)(0 == 0)));
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS __nvoc_ctor_OBJENGSTATE(OBJENGSTATE* );
|
||||
@ -171,10 +181,14 @@ __nvoc_ctor_KernelFsp_exit:
|
||||
static void __nvoc_init_funcTable_KernelFsp_1(KernelFsp *pThis, RmHalspecOwner *pRmhalspecowner) {
|
||||
ChipHal *chipHal = &pRmhalspecowner->chipHal;
|
||||
const unsigned long chipHal_HalVarIdx = (unsigned long)chipHal->__nvoc_HalVarIdx;
|
||||
RmVariantHal *rmVariantHal = &pRmhalspecowner->rmVariantHal;
|
||||
const unsigned long rmVariantHal_HalVarIdx = (unsigned long)rmVariantHal->__nvoc_HalVarIdx;
|
||||
PORT_UNREFERENCED_VARIABLE(pThis);
|
||||
PORT_UNREFERENCED_VARIABLE(pRmhalspecowner);
|
||||
PORT_UNREFERENCED_VARIABLE(chipHal);
|
||||
PORT_UNREFERENCED_VARIABLE(chipHal_HalVarIdx);
|
||||
PORT_UNREFERENCED_VARIABLE(rmVariantHal);
|
||||
PORT_UNREFERENCED_VARIABLE(rmVariantHal_HalVarIdx);
|
||||
|
||||
pThis->__kfspConstructEngine__ = &kfspConstructEngine_IMPL;
|
||||
|
||||
|
@ -425,6 +425,28 @@ static void __nvoc_init_funcTable_KernelMemorySystem_1(KernelMemorySystem *pThis
|
||||
pThis->__kmemsysRemoveAllAtsPeers__ = &kmemsysRemoveAllAtsPeers_GV100;
|
||||
}
|
||||
|
||||
// Hal function -- kmemsysCheckEccCounts
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
pThis->__kmemsysCheckEccCounts__ = &kmemsysCheckEccCounts_GH100;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__kmemsysCheckEccCounts__ = &kmemsysCheckEccCounts_b3696a;
|
||||
}
|
||||
|
||||
// Hal function -- kmemsysClearEccCounts
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
pThis->__kmemsysClearEccCounts__ = &kmemsysClearEccCounts_GH100;
|
||||
}
|
||||
// default
|
||||
else
|
||||
{
|
||||
pThis->__kmemsysClearEccCounts__ = &kmemsysClearEccCounts_56cd7a;
|
||||
}
|
||||
|
||||
pThis->__nvoc_base_OBJENGSTATE.__engstateConstructEngine__ = &__nvoc_thunk_KernelMemorySystem_engstateConstructEngine;
|
||||
|
||||
pThis->__nvoc_base_OBJENGSTATE.__engstateStateInitLocked__ = &__nvoc_thunk_KernelMemorySystem_engstateStateInitLocked;
|
||||
|
@ -222,6 +222,8 @@ struct KernelMemorySystem {
|
||||
void (*__kmemsysNumaRemoveAllMemory__)(OBJGPU *, struct KernelMemorySystem *);
|
||||
NV_STATUS (*__kmemsysSetupAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
|
||||
void (*__kmemsysRemoveAllAtsPeers__)(OBJGPU *, struct KernelMemorySystem *);
|
||||
void (*__kmemsysCheckEccCounts__)(OBJGPU *, struct KernelMemorySystem *);
|
||||
NV_STATUS (*__kmemsysClearEccCounts__)(OBJGPU *, struct KernelMemorySystem *);
|
||||
NV_STATUS (*__kmemsysStateLoad__)(POBJGPU, struct KernelMemorySystem *, NvU32);
|
||||
NV_STATUS (*__kmemsysStateUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
|
||||
NV_STATUS (*__kmemsysStatePostUnload__)(POBJGPU, struct KernelMemorySystem *, NvU32);
|
||||
@ -323,6 +325,10 @@ NV_STATUS __nvoc_objCreate_KernelMemorySystem(KernelMemorySystem**, Dynamic*, Nv
|
||||
#define kmemsysSetupAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysSetupAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysRemoveAllAtsPeers(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysRemoveAllAtsPeers_HAL(pGpu, pKernelMemorySystem) kmemsysRemoveAllAtsPeers_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysCheckEccCounts(pGpu, pKernelMemorySystem) kmemsysCheckEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem) kmemsysCheckEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysClearEccCounts(pGpu, pKernelMemorySystem) kmemsysClearEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysClearEccCounts_HAL(pGpu, pKernelMemorySystem) kmemsysClearEccCounts_DISPATCH(pGpu, pKernelMemorySystem)
|
||||
#define kmemsysStateLoad(pGpu, pEngstate, arg0) kmemsysStateLoad_DISPATCH(pGpu, pEngstate, arg0)
|
||||
#define kmemsysStateUnload(pGpu, pEngstate, arg0) kmemsysStateUnload_DISPATCH(pGpu, pEngstate, arg0)
|
||||
#define kmemsysStatePostUnload(pGpu, pEngstate, arg0) kmemsysStatePostUnload_DISPATCH(pGpu, pEngstate, arg0)
|
||||
@ -733,6 +739,26 @@ static inline void kmemsysRemoveAllAtsPeers_DISPATCH(OBJGPU *pGpu, struct Kernel
|
||||
pKernelMemorySystem->__kmemsysRemoveAllAtsPeers__(pGpu, pKernelMemorySystem);
|
||||
}
|
||||
|
||||
void kmemsysCheckEccCounts_GH100(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
|
||||
|
||||
static inline void kmemsysCheckEccCounts_b3696a(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void kmemsysCheckEccCounts_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
|
||||
pKernelMemorySystem->__kmemsysCheckEccCounts__(pGpu, pKernelMemorySystem);
|
||||
}
|
||||
|
||||
NV_STATUS kmemsysClearEccCounts_GH100(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem);
|
||||
|
||||
static inline NV_STATUS kmemsysClearEccCounts_56cd7a(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static inline NV_STATUS kmemsysClearEccCounts_DISPATCH(OBJGPU *pGpu, struct KernelMemorySystem *pKernelMemorySystem) {
|
||||
return pKernelMemorySystem->__kmemsysClearEccCounts__(pGpu, pKernelMemorySystem);
|
||||
}
|
||||
|
||||
static inline NV_STATUS kmemsysStateLoad_DISPATCH(POBJGPU pGpu, struct KernelMemorySystem *pEngstate, NvU32 arg0) {
|
||||
return pEngstate->__kmemsysStateLoad__(pGpu, pEngstate, arg0);
|
||||
}
|
||||
|
@ -221,6 +221,16 @@ static void __nvoc_init_funcTable_KernelCE_1(KernelCE *pThis, RmHalspecOwner *pR
|
||||
|
||||
pThis->__kceServiceNotificationInterrupt__ = &kceServiceNotificationInterrupt_IMPL;
|
||||
|
||||
// Hal function -- kceGetP2PCes
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x10000000UL) )) /* ChipHal: GH100 */
|
||||
{
|
||||
pThis->__kceGetP2PCes__ = &kceGetP2PCes_GH100;
|
||||
}
|
||||
else
|
||||
{
|
||||
pThis->__kceGetP2PCes__ = &kceGetP2PCes_GV100;
|
||||
}
|
||||
|
||||
// Hal function -- kceGetNvlinkAutoConfigCeValues
|
||||
if (( ((chipHal_HalVarIdx >> 5) == 1UL) && ((1UL << (chipHal_HalVarIdx & 0x1f)) & 0x000003e0UL) )) /* ChipHal: TU102 | TU104 | TU106 | TU116 | TU117 */
|
||||
{
|
||||
|
@ -113,6 +113,7 @@ struct KernelCE {
|
||||
NV_STATUS (*__kceStateUnload__)(OBJGPU *, struct KernelCE *, NvU32);
|
||||
void (*__kceRegisterIntrService__)(OBJGPU *, struct KernelCE *, IntrServiceRecord *);
|
||||
NV_STATUS (*__kceServiceNotificationInterrupt__)(OBJGPU *, struct KernelCE *, IntrServiceServiceNotificationInterruptArguments *);
|
||||
NV_STATUS (*__kceGetP2PCes__)(struct KernelCE *, OBJGPU *, NvU32, NvU32 *);
|
||||
NV_STATUS (*__kceGetNvlinkAutoConfigCeValues__)(OBJGPU *, struct KernelCE *, NvU32 *, NvU32 *, NvU32 *);
|
||||
NvBool (*__kceGetNvlinkMaxTopoForTable__)(OBJGPU *, struct KernelCE *, struct NVLINK_TOPOLOGY_PARAMS *, void *, NvU32, NvU32 *);
|
||||
NvBool (*__kceIsCurrentMaxTopology__)(OBJGPU *, struct KernelCE *, struct NVLINK_TOPOLOGY_PARAMS *, NvU32 *, NvU32 *);
|
||||
@ -190,6 +191,8 @@ NV_STATUS __nvoc_objCreate_KernelCE(KernelCE**, Dynamic*, NvU32);
|
||||
#define kceStateUnload_HAL(pGpu, pKCe, flags) kceStateUnload_DISPATCH(pGpu, pKCe, flags)
|
||||
#define kceRegisterIntrService(arg0, arg1, arg2) kceRegisterIntrService_DISPATCH(arg0, arg1, arg2)
|
||||
#define kceServiceNotificationInterrupt(arg0, arg1, arg2) kceServiceNotificationInterrupt_DISPATCH(arg0, arg1, arg2)
|
||||
#define kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_DISPATCH(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
|
||||
#define kceGetP2PCes_HAL(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_DISPATCH(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
|
||||
#define kceGetNvlinkAutoConfigCeValues(pGpu, pKCe, arg0, arg1, arg2) kceGetNvlinkAutoConfigCeValues_DISPATCH(pGpu, pKCe, arg0, arg1, arg2)
|
||||
#define kceGetNvlinkAutoConfigCeValues_HAL(pGpu, pKCe, arg0, arg1, arg2) kceGetNvlinkAutoConfigCeValues_DISPATCH(pGpu, pKCe, arg0, arg1, arg2)
|
||||
#define kceGetNvlinkMaxTopoForTable(pGpu, pKCe, arg0, arg1, arg2, arg3) kceGetNvlinkMaxTopoForTable_DISPATCH(pGpu, pKCe, arg0, arg1, arg2, arg3)
|
||||
@ -305,20 +308,6 @@ static inline NvBool kceIsCeNvlinkP2P(OBJGPU *pGpu, struct KernelCE *pKCe) {
|
||||
|
||||
#define kceIsCeNvlinkP2P_HAL(pGpu, pKCe) kceIsCeNvlinkP2P(pGpu, pKCe)
|
||||
|
||||
NV_STATUS kceGetP2PCes_GV100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
|
||||
|
||||
|
||||
#ifdef __nvoc_kernel_ce_h_disabled
|
||||
static inline NV_STATUS kceGetP2PCes(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask) {
|
||||
NV_ASSERT_FAILED_PRECOMP("KernelCE was disabled!");
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#else //__nvoc_kernel_ce_h_disabled
|
||||
#define kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes_GV100(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
|
||||
#endif //__nvoc_kernel_ce_h_disabled
|
||||
|
||||
#define kceGetP2PCes_HAL(arg0, pGpu, gpuMask, nvlinkP2PCeMask) kceGetP2PCes(arg0, pGpu, gpuMask, nvlinkP2PCeMask)
|
||||
|
||||
void kceGetSysmemRWLCEs_GV100(struct KernelCE *arg0, NvU32 *rd, NvU32 *wr);
|
||||
|
||||
|
||||
@ -397,6 +386,14 @@ static inline NV_STATUS kceServiceNotificationInterrupt_DISPATCH(OBJGPU *arg0, s
|
||||
return arg1->__kceServiceNotificationInterrupt__(arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
NV_STATUS kceGetP2PCes_GV100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
|
||||
|
||||
NV_STATUS kceGetP2PCes_GH100(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask);
|
||||
|
||||
static inline NV_STATUS kceGetP2PCes_DISPATCH(struct KernelCE *arg0, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask) {
|
||||
return arg0->__kceGetP2PCes__(arg0, pGpu, gpuMask, nvlinkP2PCeMask);
|
||||
}
|
||||
|
||||
NV_STATUS kceGetNvlinkAutoConfigCeValues_TU102(OBJGPU *pGpu, struct KernelCE *pKCe, NvU32 *arg0, NvU32 *arg1, NvU32 *arg2);
|
||||
|
||||
NV_STATUS kceGetNvlinkAutoConfigCeValues_GA100(OBJGPU *pGpu, struct KernelCE *pKCe, NvU32 *arg0, NvU32 *arg1, NvU32 *arg2);
|
||||
|
@ -105,10 +105,6 @@ static NV_STATUS __nvoc_thunk_MemoryMulticastFabric_memControl(struct Memory *pM
|
||||
return memorymulticastfabricControl((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_Memory.offset), pCallContext, pParams);
|
||||
}
|
||||
|
||||
static NV_STATUS __nvoc_thunk_MemoryMulticastFabric_rmresControl_Prologue(struct RmResource *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
|
||||
return memorymulticastfabricControl_Prologue((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_RmResource.offset), pCallContext, pParams);
|
||||
}
|
||||
|
||||
static NvBool __nvoc_thunk_MemoryMulticastFabric_memIsGpuMapAllowed(struct Memory *pMemoryMulticastFabric, struct OBJGPU *pGpu) {
|
||||
return memorymulticastfabricIsGpuMapAllowed((struct MemoryMulticastFabric *)(((unsigned char *)pMemoryMulticastFabric) - __nvoc_rtti_MemoryMulticastFabric_Memory.offset), pGpu);
|
||||
}
|
||||
@ -137,6 +133,10 @@ static void __nvoc_thunk_RsResource_memorymulticastfabricAddAdditionalDependants
|
||||
resAddAdditionalDependants(pClient, (struct RsResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RsResource.offset), pReference);
|
||||
}
|
||||
|
||||
static NV_STATUS __nvoc_thunk_RmResource_memorymulticastfabricControl_Prologue(struct MemoryMulticastFabric *pResource, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
|
||||
return rmresControl_Prologue((struct RmResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RmResource.offset), pCallContext, pParams);
|
||||
}
|
||||
|
||||
static NV_STATUS __nvoc_thunk_RsResource_memorymulticastfabricUnmapFrom(struct MemoryMulticastFabric *pResource, RS_RES_UNMAP_FROM_PARAMS *pParams) {
|
||||
return resUnmapFrom((struct RsResource *)(((unsigned char *)pResource) + __nvoc_rtti_MemoryMulticastFabric_RsResource.offset), pParams);
|
||||
}
|
||||
@ -324,8 +324,6 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
|
||||
|
||||
pThis->__memorymulticastfabricControl__ = &memorymulticastfabricControl_IMPL;
|
||||
|
||||
pThis->__memorymulticastfabricControl_Prologue__ = &memorymulticastfabricControl_Prologue_IMPL;
|
||||
|
||||
pThis->__memorymulticastfabricIsGpuMapAllowed__ = &memorymulticastfabricIsGpuMapAllowed_IMPL;
|
||||
|
||||
pThis->__memorymulticastfabricGetMapAddrSpace__ = &memorymulticastfabricGetMapAddrSpace_IMPL;
|
||||
@ -356,8 +354,6 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
|
||||
|
||||
pThis->__nvoc_base_Memory.__memControl__ = &__nvoc_thunk_MemoryMulticastFabric_memControl;
|
||||
|
||||
pThis->__nvoc_base_Memory.__nvoc_base_RmResource.__rmresControl_Prologue__ = &__nvoc_thunk_MemoryMulticastFabric_rmresControl_Prologue;
|
||||
|
||||
pThis->__nvoc_base_Memory.__memIsGpuMapAllowed__ = &__nvoc_thunk_MemoryMulticastFabric_memIsGpuMapAllowed;
|
||||
|
||||
pThis->__nvoc_base_Memory.__memGetMapAddrSpace__ = &__nvoc_thunk_MemoryMulticastFabric_memGetMapAddrSpace;
|
||||
@ -372,6 +368,8 @@ static void __nvoc_init_funcTable_MemoryMulticastFabric_1(MemoryMulticastFabric
|
||||
|
||||
pThis->__memorymulticastfabricAddAdditionalDependants__ = &__nvoc_thunk_RsResource_memorymulticastfabricAddAdditionalDependants;
|
||||
|
||||
pThis->__memorymulticastfabricControl_Prologue__ = &__nvoc_thunk_RmResource_memorymulticastfabricControl_Prologue;
|
||||
|
||||
pThis->__memorymulticastfabricUnmapFrom__ = &__nvoc_thunk_RsResource_memorymulticastfabricUnmapFrom;
|
||||
|
||||
pThis->__memorymulticastfabricControl_Epilogue__ = &__nvoc_thunk_RmResource_memorymulticastfabricControl_Epilogue;
|
||||
|
@ -158,7 +158,6 @@ struct MemoryMulticastFabric {
|
||||
NV_STATUS (*__memorymulticastfabricCopyConstruct__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_ALLOC_PARAMS_INTERNAL *);
|
||||
NV_STATUS (*__memorymulticastfabricIsReady__)(struct MemoryMulticastFabric *, NvBool);
|
||||
NV_STATUS (*__memorymulticastfabricControl__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
|
||||
NV_STATUS (*__memorymulticastfabricControl_Prologue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
|
||||
NvBool (*__memorymulticastfabricIsGpuMapAllowed__)(struct MemoryMulticastFabric *, struct OBJGPU *);
|
||||
NV_STATUS (*__memorymulticastfabricGetMapAddrSpace__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, NvU32, NV_ADDRESS_SPACE *);
|
||||
NV_STATUS (*__memorymulticastfabricCtrlGetInfo__)(struct MemoryMulticastFabric *, NV00FD_CTRL_GET_INFO_PARAMS *);
|
||||
@ -171,6 +170,7 @@ struct MemoryMulticastFabric {
|
||||
NV_STATUS (*__memorymulticastfabricMapTo__)(struct MemoryMulticastFabric *, RS_RES_MAP_TO_PARAMS *);
|
||||
NvU32 (*__memorymulticastfabricGetRefCount__)(struct MemoryMulticastFabric *);
|
||||
void (*__memorymulticastfabricAddAdditionalDependants__)(struct RsClient *, struct MemoryMulticastFabric *, RsResourceRef *);
|
||||
NV_STATUS (*__memorymulticastfabricControl_Prologue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
|
||||
NV_STATUS (*__memorymulticastfabricUnmapFrom__)(struct MemoryMulticastFabric *, RS_RES_UNMAP_FROM_PARAMS *);
|
||||
void (*__memorymulticastfabricControl_Epilogue__)(struct MemoryMulticastFabric *, CALL_CONTEXT *, struct RS_RES_CONTROL_PARAMS_INTERNAL *);
|
||||
NV_STATUS (*__memorymulticastfabricControlLookup__)(struct MemoryMulticastFabric *, struct RS_RES_CONTROL_PARAMS_INTERNAL *, const struct NVOC_EXPORTED_METHOD_DEF **);
|
||||
@ -220,7 +220,6 @@ NV_STATUS __nvoc_objCreate_MemoryMulticastFabric(MemoryMulticastFabric**, Dynami
|
||||
#define memorymulticastfabricCopyConstruct(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricCopyConstruct_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
|
||||
#define memorymulticastfabricIsReady(pMemoryMulticastFabric, bCopyConstructorContext) memorymulticastfabricIsReady_DISPATCH(pMemoryMulticastFabric, bCopyConstructorContext)
|
||||
#define memorymulticastfabricControl(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricControl_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
|
||||
#define memorymulticastfabricControl_Prologue(pMemoryMulticastFabric, pCallContext, pParams) memorymulticastfabricControl_Prologue_DISPATCH(pMemoryMulticastFabric, pCallContext, pParams)
|
||||
#define memorymulticastfabricIsGpuMapAllowed(pMemoryMulticastFabric, pGpu) memorymulticastfabricIsGpuMapAllowed_DISPATCH(pMemoryMulticastFabric, pGpu)
|
||||
#define memorymulticastfabricGetMapAddrSpace(pMemoryMulticastFabric, pCallContext, mapFlags, pAddrSpace) memorymulticastfabricGetMapAddrSpace_DISPATCH(pMemoryMulticastFabric, pCallContext, mapFlags, pAddrSpace)
|
||||
#define memorymulticastfabricCtrlGetInfo(pMemoryMulticastFabric, pParams) memorymulticastfabricCtrlGetInfo_DISPATCH(pMemoryMulticastFabric, pParams)
|
||||
@ -233,6 +232,7 @@ NV_STATUS __nvoc_objCreate_MemoryMulticastFabric(MemoryMulticastFabric**, Dynami
|
||||
#define memorymulticastfabricMapTo(pResource, pParams) memorymulticastfabricMapTo_DISPATCH(pResource, pParams)
|
||||
#define memorymulticastfabricGetRefCount(pResource) memorymulticastfabricGetRefCount_DISPATCH(pResource)
|
||||
#define memorymulticastfabricAddAdditionalDependants(pClient, pResource, pReference) memorymulticastfabricAddAdditionalDependants_DISPATCH(pClient, pResource, pReference)
|
||||
#define memorymulticastfabricControl_Prologue(pResource, pCallContext, pParams) memorymulticastfabricControl_Prologue_DISPATCH(pResource, pCallContext, pParams)
|
||||
#define memorymulticastfabricUnmapFrom(pResource, pParams) memorymulticastfabricUnmapFrom_DISPATCH(pResource, pParams)
|
||||
#define memorymulticastfabricControl_Epilogue(pResource, pCallContext, pParams) memorymulticastfabricControl_Epilogue_DISPATCH(pResource, pCallContext, pParams)
|
||||
#define memorymulticastfabricControlLookup(pResource, pParams, ppEntry) memorymulticastfabricControlLookup_DISPATCH(pResource, pParams, ppEntry)
|
||||
@ -271,12 +271,6 @@ static inline NV_STATUS memorymulticastfabricControl_DISPATCH(struct MemoryMulti
|
||||
return pMemoryMulticastFabric->__memorymulticastfabricControl__(pMemoryMulticastFabric, pCallContext, pParams);
|
||||
}
|
||||
|
||||
NV_STATUS memorymulticastfabricControl_Prologue_IMPL(struct MemoryMulticastFabric *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams);
|
||||
|
||||
static inline NV_STATUS memorymulticastfabricControl_Prologue_DISPATCH(struct MemoryMulticastFabric *pMemoryMulticastFabric, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
|
||||
return pMemoryMulticastFabric->__memorymulticastfabricControl_Prologue__(pMemoryMulticastFabric, pCallContext, pParams);
|
||||
}
|
||||
|
||||
NvBool memorymulticastfabricIsGpuMapAllowed_IMPL(struct MemoryMulticastFabric *pMemoryMulticastFabric, struct OBJGPU *pGpu);
|
||||
|
||||
static inline NvBool memorymulticastfabricIsGpuMapAllowed_DISPATCH(struct MemoryMulticastFabric *pMemoryMulticastFabric, struct OBJGPU *pGpu) {
|
||||
@ -339,6 +333,10 @@ static inline void memorymulticastfabricAddAdditionalDependants_DISPATCH(struct
|
||||
pResource->__memorymulticastfabricAddAdditionalDependants__(pClient, pResource, pReference);
|
||||
}
|
||||
|
||||
static inline NV_STATUS memorymulticastfabricControl_Prologue_DISPATCH(struct MemoryMulticastFabric *pResource, CALL_CONTEXT *pCallContext, struct RS_RES_CONTROL_PARAMS_INTERNAL *pParams) {
|
||||
return pResource->__memorymulticastfabricControl_Prologue__(pResource, pCallContext, pParams);
|
||||
}
|
||||
|
||||
static inline NV_STATUS memorymulticastfabricUnmapFrom_DISPATCH(struct MemoryMulticastFabric *pResource, RS_RES_UNMAP_FROM_PARAMS *pParams) {
|
||||
return pResource->__memorymulticastfabricUnmapFrom__(pResource, pParams);
|
||||
}
|
||||
|
@ -1007,6 +1007,10 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
|
||||
{ 0x27B0, 0x16fa, 0x10de, "NVIDIA RTX 4000 SFF Ada Generation" },
|
||||
{ 0x27B0, 0x16fa, 0x17aa, "NVIDIA RTX 4000 SFF Ada Generation" },
|
||||
{ 0x27B1, 0x180c, 0x1028, "NVIDIA RTX 4500 Ada Generation" },
|
||||
{ 0x27B1, 0x180c, 0x103c, "NVIDIA RTX 4500 Ada Generation" },
|
||||
{ 0x27B1, 0x180c, 0x10de, "NVIDIA RTX 4500 Ada Generation" },
|
||||
{ 0x27B1, 0x180c, 0x17aa, "NVIDIA RTX 4500 Ada Generation" },
|
||||
{ 0x27B2, 0x181b, 0x1028, "NVIDIA RTX 4000 Ada Generation" },
|
||||
{ 0x27B2, 0x181b, 0x103c, "NVIDIA RTX 4000 Ada Generation" },
|
||||
{ 0x27B2, 0x181b, 0x10de, "NVIDIA RTX 4000 Ada Generation" },
|
||||
|
@ -880,6 +880,10 @@ NV_STATUS osReserveCpuAddressSpaceUpperBound(void **ppSectionHandle,
|
||||
NvU64 maxSectionSize);
|
||||
void osReleaseCpuAddressSpaceUpperBound(void *pSectionHandle);
|
||||
|
||||
void* osGetPidInfo(void);
|
||||
void osPutPidInfo(void *pOsPidInfo);
|
||||
NV_STATUS osFindNsPid(void *pOsPidInfo, NvU32 *pNsPid);
|
||||
|
||||
// OS Tegra IPC functions
|
||||
NV_STATUS osTegraDceRegisterIpcClient(NvU32 interfaceType, void *usrCtx,
|
||||
NvU32 *clientId);
|
||||
@ -1249,6 +1253,8 @@ static NV_INLINE NV_STATUS isrWrapper(NvBool testIntr, OBJGPU *pGpu)
|
||||
#define OS_PCIE_CAP_MASK_REQ_ATOMICS_64 NVBIT(1)
|
||||
#define OS_PCIE_CAP_MASK_REQ_ATOMICS_128 NVBIT(2)
|
||||
|
||||
void osGetNumaMemoryUsage(NvS32 numaId, NvU64 *free_memory_bytes, NvU64 *total_memory_bytes);
|
||||
|
||||
NV_STATUS osNumaAddGpuMemory(OS_GPU_INFO *pOsGpuInfo, NvU64 offset,
|
||||
NvU64 size, NvU32 *pNumaNodeId);
|
||||
void osNumaRemoveGpuMemory(OS_GPU_INFO *pOsGpuInfo, NvU64 offset,
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "published/hopper/gh100/dev_pmc.h"
|
||||
#include "published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h"
|
||||
#include "published/hopper/gh100/pri_nv_xal_ep.h"
|
||||
#include "published/hopper/gh100/dev_xtl_ep_pri.h"
|
||||
|
||||
#include "ctrl/ctrl2080/ctrl2080mc.h"
|
||||
|
||||
@ -77,6 +78,28 @@ gpuReadBusConfigReg_GH100
|
||||
return gpuReadBusConfigCycle(pGpu, index, pData);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Read the non-private registers on vGPU through mirror space
|
||||
*
|
||||
* @param[in] pGpu GPU object pointer
|
||||
* @param[in] index Register offset in PCIe config space
|
||||
* @param[out] pData Value of the register
|
||||
*
|
||||
* @returns NV_OK on success
|
||||
*/
|
||||
NV_STATUS
|
||||
gpuReadVgpuConfigReg_GH100
|
||||
(
|
||||
OBJGPU *pGpu,
|
||||
NvU32 index,
|
||||
NvU32 *pData
|
||||
)
|
||||
{
|
||||
*pData = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_EP_PCFGM) + index);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Get GPU ID based on PCIE config reads.
|
||||
* Also determine other properties of the PCIE capabilities.
|
||||
|
@ -45,6 +45,7 @@
|
||||
#define NV_CE_NUM_FBPCE 4
|
||||
#define NV_CE_NUM_PCES_NO_LINK_CASE 12
|
||||
#define NV_CE_MAX_PCE_PER_GRCE 2
|
||||
#define NV_CE_HSHUBNVL_ID_0 2
|
||||
|
||||
/*
|
||||
* Table for setting the PCE2LCE mapping for WAR configs that cannot be implemented
|
||||
@ -931,3 +932,181 @@ kceGetMappings_GH100
|
||||
NV_PRINTF(LEVEL_INFO, "status = %d, statusC2C = %d\n", status, statusC2C);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS kceGetP2PCes_GH100(KernelCE *pKCe, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask)
|
||||
{
|
||||
//
|
||||
// Currently Bug 4103154 requires an updated algorithm described below
|
||||
// to assign the proper LCE. Cases without MODS enabled can default back
|
||||
// to the previous version.
|
||||
//
|
||||
return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask);
|
||||
|
||||
NvU32 gpuCount = gpumgrGetSubDeviceCount(gpuMask);
|
||||
NvU32 minP2PLce = (NV_CE_EVEN_ASYNC_LCE_MASK | NV_CE_ODD_ASYNC_LCE_MASK) & NV_CE_MAX_LCE_MASK;
|
||||
NvU32 i;
|
||||
KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
|
||||
|
||||
if (pKernelNvlink == NULL)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
|
||||
{
|
||||
return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask);
|
||||
}
|
||||
|
||||
LOWESTBITIDX_32(minP2PLce);
|
||||
*nvlinkP2PCeMask = 0;
|
||||
|
||||
if (gpuCount == 1)
|
||||
{
|
||||
*nvlinkP2PCeMask |= NVBIT(minP2PLce);
|
||||
for (i = minP2PLce; i < gpuGetNumCEs(pGpu); i++)
|
||||
{
|
||||
*nvlinkP2PCeMask |= NVBIT(i);
|
||||
|
||||
}
|
||||
}
|
||||
else if (gpuCount > 2)
|
||||
{
|
||||
// if gpuCount > 2, this is an invalid request. Print warning and return NV_OK
|
||||
NV_PRINTF(LEVEL_INFO, "GPU %d invalid request for gpuCount %d\n", gpuGetInstance(pGpu), gpuCount);
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
else
|
||||
{
|
||||
OBJGPU *pRemoteGpu = NULL;
|
||||
KernelCE *pKCeLoop = NULL;
|
||||
NvU32 peerLinkMask = 0;
|
||||
NvU32 gpuInstance = 0;
|
||||
NvU32 phyLinkId, status, targetPceMask, numPces;
|
||||
|
||||
//
|
||||
// The LCE returned should be the LCE which has the most PCEs mapped
|
||||
// on the given HSHUB. This HSHUB should be determined by
|
||||
// tracking where the majority of links are connected.
|
||||
//
|
||||
NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0};
|
||||
NvU32 maxLinksConnectedHshub = 0;
|
||||
NvU32 maxConnectedHshubId = NV_CE_MAX_HSHUBS;
|
||||
NvU32 lceAssignedMask = 0;
|
||||
KernelCE *maxLcePerHshub[NV_CE_MAX_HSHUBS] = {0};
|
||||
|
||||
NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params;
|
||||
|
||||
if (pKernelNvlink != NULL)
|
||||
{
|
||||
// Get the remote GPU
|
||||
while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
|
||||
{
|
||||
if (pRemoteGpu != pGpu)
|
||||
break;
|
||||
}
|
||||
|
||||
NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE);
|
||||
gpuInstance = gpuGetInstance(pRemoteGpu);
|
||||
|
||||
peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu);
|
||||
}
|
||||
|
||||
portMemSet(¶ms, 0, sizeof(params));
|
||||
params.linkMask = peerLinkMask;
|
||||
|
||||
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
|
||||
NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS,
|
||||
(void *)¶ms, sizeof(params));
|
||||
NV_ASSERT_OK_OR_RETURN(status);
|
||||
|
||||
|
||||
FOR_EACH_INDEX_IN_MASK(32, phyLinkId, peerLinkMask)
|
||||
{
|
||||
NvU32 hshubId = params.hshubIds[phyLinkId];
|
||||
linksPerHshub[hshubId]++;
|
||||
|
||||
if (linksPerHshub[hshubId] > maxLinksConnectedHshub)
|
||||
{
|
||||
maxLinksConnectedHshub = linksPerHshub[hshubId];
|
||||
maxConnectedHshubId = hshubId;
|
||||
}
|
||||
}
|
||||
FOR_EACH_INDEX_IN_MASK_END;
|
||||
|
||||
//
|
||||
// Iterate through all Async LCEs to track which HSHUB should
|
||||
// be using which LCE. This is decided based on the majority. If
|
||||
// there is a tie, then LCE with the lower index is preferred.
|
||||
//
|
||||
KCE_ITER_ALL_BEGIN(pGpu, pKCeLoop, minP2PLce)
|
||||
NvU32 localMaxPcePerHshub = 0;
|
||||
KernelCE *localMaxLcePerHshub;
|
||||
NvU32 localMaxHshub = NV_CE_MAX_HSHUBS;
|
||||
|
||||
// if LCE is stubbed or LCE is already assigned to another peer
|
||||
if (pKCeLoop->bStubbed)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// LCE is already assigned to this peer
|
||||
if ((pKCeLoop->nvlinkPeerMask & NVBIT(gpuInstance)) != 0)
|
||||
{
|
||||
maxLcePerHshub[maxConnectedHshubId] = pKCeLoop;
|
||||
break;
|
||||
}
|
||||
// LCE is already assigned to another peer
|
||||
else if (pKCeLoop->nvlinkPeerMask != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS params = {0};
|
||||
|
||||
params.ceEngineType = NV2080_ENGINE_TYPE_COPY(pKCeLoop->publicID);
|
||||
status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
|
||||
NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK,
|
||||
(void *)¶ms, sizeof(params));
|
||||
NV_ASSERT_OK_OR_RETURN(status);
|
||||
|
||||
//
|
||||
// An LCE may be utilized across several HSHUBs. Loop through all HSHUBs
|
||||
// in order to decide which HSHUB holds the majority of this specific LCE.
|
||||
// To help with this, create a mask of PCEs only on the HSHUB which the peer
|
||||
// is most connected to by shifting the HSHUB PCE mask
|
||||
//
|
||||
|
||||
for (i = NV_CE_HSHUBNVL_ID_0; i < NV_CE_MAX_HSHUBS; i++)
|
||||
{
|
||||
targetPceMask = params.pceMask & ((NVBIT(NV_CE_PCE_PER_HSHUB) - 1) << ((i - NV_CE_HSHUBNVL_ID_0) * NV_CE_PCE_PER_HSHUB));
|
||||
numPces = nvPopCount32(targetPceMask);
|
||||
if (numPces > localMaxPcePerHshub && !(lceAssignedMask & NVBIT(pKCeLoop->publicID)))
|
||||
{
|
||||
localMaxPcePerHshub = numPces;
|
||||
localMaxLcePerHshub = pKCeLoop;
|
||||
localMaxHshub = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (localMaxHshub < NV_CE_MAX_HSHUBS)
|
||||
{
|
||||
maxLcePerHshub[localMaxHshub] = localMaxLcePerHshub;
|
||||
lceAssignedMask |= NVBIT(localMaxLcePerHshub->publicID);
|
||||
}
|
||||
|
||||
KCE_ITER_END
|
||||
|
||||
if (maxLcePerHshub[maxConnectedHshubId] != NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_INFO,
|
||||
"GPU %d Assigning Peer %d to LCE %d\n",
|
||||
gpuGetInstance(pGpu), gpuInstance,
|
||||
maxLcePerHshub[maxConnectedHshubId]->publicID);
|
||||
|
||||
maxLcePerHshub[maxConnectedHshubId]->nvlinkPeerMask = NVBIT(gpuInstance);
|
||||
*nvlinkP2PCeMask = NVBIT(maxLcePerHshub[maxConnectedHshubId]->publicID);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -51,6 +51,9 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
|
||||
ConfidentialCompute *pConfCompute,
|
||||
ENGDESCRIPTOR engDesc)
|
||||
{
|
||||
OBJSYS *pSys = SYS_GET_INSTANCE();
|
||||
NvU32 data = 0;
|
||||
NvBool bForceEnableCC = 0;
|
||||
pConfCompute->pSpdm = NULL;
|
||||
portMemSet(&pConfCompute->ccStaticInfo, 0, sizeof(pConfCompute->ccStaticInfo));
|
||||
pConfCompute->gspProxyRegkeys = 0;
|
||||
@ -74,6 +77,20 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
|
||||
|
||||
if (pConfCompute->getProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENABLED))
|
||||
{
|
||||
bForceEnableCC = (osReadRegistryDword(pGpu, NV_REG_STR_RM_CONFIDENTIAL_COMPUTE, &data) == NV_OK) &&
|
||||
FLD_TEST_DRF(_REG_STR, _RM_CONFIDENTIAL_COMPUTE, _ENABLED, _YES, data);
|
||||
|
||||
if (!RMCFG_FEATURE_PLATFORM_GSP && !RMCFG_FEATURE_PLATFORM_MODS && !bForceEnableCC)
|
||||
{
|
||||
if (!(sysGetStaticConfig(pSys)->bOsCCEnabled))
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "CPU does not support confidential compute.\n");
|
||||
NV_ASSERT(0);
|
||||
pConfCompute->setProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENABLED, NV_FALSE);
|
||||
return NV_ERR_INVALID_OPERATION;
|
||||
}
|
||||
}
|
||||
|
||||
NV_CHECK_OR_RETURN(LEVEL_ERROR, confComputeIsGpuCcCapable_HAL(pGpu, pConfCompute), NV_ERR_INVALID_OPERATION);
|
||||
|
||||
if (pGpu->getProperty(pGpu, PDB_PROP_GPU_APM_FEATURE_CAPABLE))
|
||||
@ -92,7 +109,7 @@ confComputeConstructEngine_IMPL(OBJGPU *pGpu,
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "GPU does not support confidential compute");
|
||||
NV_PRINTF(LEVEL_ERROR, "GPU does not support confidential compute.\n");
|
||||
NV_ASSERT(0);
|
||||
return NV_ERR_INVALID_OPERATION;
|
||||
}
|
||||
|
@ -50,6 +50,8 @@
|
||||
|
||||
#include "kernel/gpu/intr/engine_idx.h"
|
||||
|
||||
#include "gpu/external_device/external_device.h"
|
||||
|
||||
#include "ctrl/ctrl2080.h"
|
||||
|
||||
#include "class/cl5070.h"
|
||||
@ -490,6 +492,8 @@ void
|
||||
kdispStateDestroy_IMPL(OBJGPU *pGpu,
|
||||
KernelDisplay *pKernelDisplay)
|
||||
{
|
||||
extdevDestroy(pGpu);
|
||||
|
||||
if (pKernelDisplay->pInst != NULL)
|
||||
{
|
||||
instmemStateDestroy(pGpu, pKernelDisplay->pInst);
|
||||
|
@ -264,7 +264,7 @@ void *kcrashcatEngineMapBufferDescriptor_IMPL
|
||||
memdescMap(pMemDesc, 0, memdescGetSize(pMemDesc), NV_TRUE,
|
||||
NV_PROTECT_READABLE, &pBuf, &pPriv),
|
||||
{
|
||||
if (pBufDesc->pEngPriv == NULL)
|
||||
if (!pBufDesc->bRegistered)
|
||||
memdescDestroy(pMemDesc);
|
||||
return NULL;
|
||||
});
|
||||
|
@ -4941,12 +4941,19 @@ gpuReadBusConfigCycle_IMPL
|
||||
NvU8 device = gpuGetDevice(pGpu);
|
||||
NvU8 function = 0;
|
||||
|
||||
if (pGpu->hPci == NULL)
|
||||
if (IS_PASSTHRU(pGpu))
|
||||
{
|
||||
pGpu->hPci = osPciInitHandle(domain, bus, device, function, NULL, NULL);
|
||||
gpuReadVgpuConfigReg_HAL(pGpu, index, pData);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pGpu->hPci == NULL)
|
||||
{
|
||||
pGpu->hPci = osPciInitHandle(domain, bus, device, function, NULL, NULL);
|
||||
}
|
||||
|
||||
*pData = osPciReadDword(pGpu->hPci, index);
|
||||
*pData = osPciReadDword(pGpu->hPci, index);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -647,6 +647,20 @@ _gpuiIsPidSavedAlready
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
static NV_STATUS
|
||||
_gpuConvertPid
|
||||
(
|
||||
RmClient *pClient,
|
||||
NvU32 *pNsPid
|
||||
)
|
||||
{
|
||||
if (pClient->pOsPidInfo != NULL)
|
||||
return osFindNsPid(pClient->pOsPidInfo, pNsPid);
|
||||
|
||||
*pNsPid = pClient->ProcID;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
//
|
||||
// Searches through clients to find processes with clients that have
|
||||
// allocated an ElementType of class, defined by elementID. The return values
|
||||
@ -673,6 +687,7 @@ gpuGetProcWithObject_IMPL
|
||||
RmClient *pClient;
|
||||
RsClient *pRsClient;
|
||||
RsResourceRef *pResourceRef;
|
||||
NV_STATUS status;
|
||||
|
||||
NV_ASSERT_OR_RETURN((pPidArray != NULL), NV_ERR_INVALID_ARGUMENT);
|
||||
NV_ASSERT_OR_RETURN((pPidArrayCount != NULL), NV_ERR_INVALID_ARGUMENT);
|
||||
@ -782,8 +797,15 @@ gpuGetProcWithObject_IMPL
|
||||
}
|
||||
if (elementInClient)
|
||||
{
|
||||
pPidArray[pidcount] = pClient->ProcID;
|
||||
pidcount++;
|
||||
status = _gpuConvertPid(pClient, &pPidArray[pidcount]);
|
||||
if (status == NV_OK)
|
||||
{
|
||||
pidcount++;
|
||||
}
|
||||
else if (status != NV_ERR_OBJECT_NOT_FOUND)
|
||||
{
|
||||
return status;
|
||||
}
|
||||
|
||||
if (pidcount == NV2080_CTRL_GPU_GET_PIDS_MAX_COUNT)
|
||||
{
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "gpu/conf_compute/conf_compute.h"
|
||||
#include "gpu/fsp/kern_fsp.h"
|
||||
#include "gpu/gsp/kernel_gsp.h"
|
||||
#include "gpu/mem_sys/kern_mem_sys.h"
|
||||
#include "gsp/gspifpub.h"
|
||||
#include "vgpu/rpc.h"
|
||||
|
||||
@ -523,6 +524,7 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
{
|
||||
KernelFalcon *pKernelFalcon = staticCast(pKernelGsp, KernelFalcon);
|
||||
KernelFsp *pKernelFsp = GPU_GET_KERNEL_FSP(pGpu);
|
||||
KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
// Only for GSP client builds
|
||||
@ -532,8 +534,16 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
// Clear ECC errors before attempting to load GSP
|
||||
status = kmemsysClearEccCounts_HAL(pGpu, pKernelMemorySystem);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Issue clearing ECC counts! Status:0x%x\n", status);
|
||||
}
|
||||
|
||||
// Setup the descriptors that GSP-FMC needs to boot GSP-RM
|
||||
NV_ASSERT_OK_OR_RETURN(kgspSetupGspFmcArgs_HAL(pGpu, pKernelGsp, pGspFw));
|
||||
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
|
||||
kgspSetupGspFmcArgs_HAL(pGpu, pKernelGsp, pGspFw), exit);
|
||||
|
||||
kgspSetupLibosInitArgs(pGpu, pKernelGsp);
|
||||
|
||||
@ -562,7 +572,8 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
{
|
||||
NV_PRINTF(LEVEL_NOTICE, "Starting to boot GSP via FSP.\n");
|
||||
pKernelFsp->setProperty(pKernelFsp, PDB_PROP_KFSP_GSP_MODE_GSPRM, NV_TRUE);
|
||||
NV_ASSERT_OK_OR_RETURN(kfspSendBootCommands_HAL(pGpu, pKernelFsp));
|
||||
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
|
||||
kfspSendBootCommands_HAL(pGpu, pKernelFsp), exit);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -585,7 +596,7 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
kfspDumpDebugState_HAL(pGpu, pKernelFsp);
|
||||
}
|
||||
|
||||
return status;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
@ -606,7 +617,7 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
kflcnRegRead_HAL(pGpu, pKernelFalcon, NV_PFALCON_FALCON_MAILBOX0));
|
||||
NV_PRINTF(LEVEL_ERROR, "NV_PGSP_FALCON_MAILBOX1 = 0x%x\n",
|
||||
kflcnRegRead_HAL(pGpu, pKernelFalcon, NV_PFALCON_FALCON_MAILBOX1));
|
||||
return status;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
// Start polling for libos logs now that lockdown is released
|
||||
@ -640,6 +651,11 @@ kgspBootstrapRiscvOSEarly_GH100
|
||||
NV_PRINTF(LEVEL_INFO, "GSP FW RM ready.\n");
|
||||
|
||||
exit:
|
||||
// If GSP fails to boot, check if there's any DED error.
|
||||
if (status != NV_OK)
|
||||
{
|
||||
kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem);
|
||||
}
|
||||
NV_ASSERT(status == NV_OK);
|
||||
|
||||
return status;
|
||||
|
@ -799,7 +799,7 @@ kgspHealthCheck_TU102
|
||||
objDelete(pReport);
|
||||
}
|
||||
|
||||
return bHealthy;
|
||||
goto exit_health_check;
|
||||
}
|
||||
|
||||
NvU32 mb0 = GPU_REG_RD32(pGpu, NV_PGSP_MAILBOX(0));
|
||||
@ -845,6 +845,12 @@ kgspHealthCheck_TU102
|
||||
"********************************************************************************\n");
|
||||
}
|
||||
|
||||
exit_health_check:
|
||||
if (!bHealthy)
|
||||
{
|
||||
KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
|
||||
kmemsysCheckEccCounts_HAL(pGpu, pKernelMemorySystem);
|
||||
}
|
||||
return bHealthy;
|
||||
}
|
||||
|
||||
|
@ -2438,7 +2438,8 @@ kgspInitRm_IMPL
|
||||
if (pKernelGsp->pLogElf == NULL)
|
||||
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, nvlogRegisterFlushCb(kgspNvlogFlushCb, pKernelGsp), done);
|
||||
|
||||
// Wait for GFW_BOOT OK status
|
||||
// Reset thread state timeout and wait for GFW_BOOT OK status
|
||||
threadStateResetTimeout(pGpu);
|
||||
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, kgspWaitForGfwBootOk_HAL(pGpu, pKernelGsp), done);
|
||||
|
||||
// Fail early if WPR2 is up
|
||||
|
@ -494,19 +494,6 @@ memmgrStateLoad_IMPL
|
||||
memmgrScrubInit_HAL(pGpu, pMemoryManager);
|
||||
}
|
||||
|
||||
if (osNumaOnliningEnabled(pGpu->pOsGpuInfo))
|
||||
{
|
||||
//
|
||||
// NUMA onlined memory size should not exceed memory size assigned to PMA.
|
||||
// TODO : Currently in selfhosted and P9+GV100 systems numaOnlined size is less
|
||||
// than PMA Memory Size. Ideally both of them should be identical. Bug 4051320.
|
||||
//
|
||||
NvU64 pmaTotalMemorySize;
|
||||
NvU64 numaOnlineSize = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)->numaOnlineSize;
|
||||
pmaGetTotalMemory(&GPU_GET_HEAP(pGpu)->pmaObject, &pmaTotalMemorySize);
|
||||
NV_ASSERT_OR_RETURN(pmaTotalMemorySize >= numaOnlineSize, NV_ERR_INVALID_STATE);
|
||||
}
|
||||
|
||||
// Dump FB regions
|
||||
memmgrDumpFbRegions(pGpu, pMemoryManager);
|
||||
|
||||
@ -1978,6 +1965,7 @@ memmgrSetPartitionableMem_IMPL
|
||||
{
|
||||
PMA_REGION_DESCRIPTOR *pFirstPmaRegionDesc = NULL;
|
||||
NvU32 numPmaRegions;
|
||||
NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
|
||||
|
||||
NV_ASSERT_OK_OR_RETURN(pmaGetRegionInfo(&pHeap->pmaObject,
|
||||
&numPmaRegions, &pFirstPmaRegionDesc));
|
||||
@ -1986,6 +1974,8 @@ memmgrSetPartitionableMem_IMPL
|
||||
pmaGetFreeMemory(&pHeap->pmaObject, &freeMem);
|
||||
pmaGetTotalMemory(&pHeap->pmaObject, &size);
|
||||
|
||||
NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig));
|
||||
|
||||
//
|
||||
// MIG won't be used alongside APM and hence the check below is of no use
|
||||
// Even if we enable the check for APM the check will fail given that after
|
||||
@ -1996,8 +1986,11 @@ memmgrSetPartitionableMem_IMPL
|
||||
// channels are required to be in CPR vidmem. This changes the calculation below
|
||||
// We can ignore this for the non-MIG case.
|
||||
//
|
||||
if (!gpuIsCCorApmFeatureEnabled(pGpu) ||
|
||||
IS_MIG_ENABLED(pGpu))
|
||||
// When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
|
||||
// and hence free memory can not be expected to be same as total memory.
|
||||
//
|
||||
if ((!gpuIsCCorApmFeatureEnabled(pGpu) || IS_MIG_ENABLED(pGpu)) &&
|
||||
!(pmaConfig & PMA_QUERY_NUMA_ONLINED))
|
||||
{
|
||||
//
|
||||
// PMA should be completely free at this point, otherwise we risk
|
||||
@ -2891,6 +2884,7 @@ memmgrPmaRegisterRegions_IMPL
|
||||
NvU32 blPageIndex;
|
||||
NvU32 blackListCount;
|
||||
NvU64 base, size;
|
||||
NvU64 pmaTotalMemorySize = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
const MEMORY_SYSTEM_STATIC_CONFIG *pMemsysConfig =
|
||||
kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
|
||||
@ -2983,6 +2977,7 @@ memmgrPmaRegisterRegions_IMPL
|
||||
}
|
||||
}
|
||||
|
||||
pmaTotalMemorySize += (pmaRegion.limit - pmaRegion.base + 1);
|
||||
NV_PRINTF(LEVEL_INFO,
|
||||
"Register FB region %llx..%llx of size %llx with PMA\n",
|
||||
pmaRegion.base, pmaRegion.limit,
|
||||
@ -3008,6 +3003,18 @@ memmgrPmaRegisterRegions_IMPL
|
||||
pmaRegionIdx++;
|
||||
}
|
||||
|
||||
if (gpuIsSelfHosted(pGpu) && osNumaOnliningEnabled(pGpu->pOsGpuInfo))
|
||||
{
|
||||
//
|
||||
// NUMA onlined memory size should not exceed memory size assigned to PMA.
|
||||
// TODO : Currently in selfhosted and P9+GV100 systems numaOnlined size is less
|
||||
// than PMA Memory Size. Ideally both of them should be identical. Bug 4051320.
|
||||
//
|
||||
NvU64 numaTotalSize = 0;
|
||||
NvU64 numaFreeSize = 0;
|
||||
osGetNumaMemoryUsage(pPma->numaNodeId, &numaFreeSize, &numaTotalSize);
|
||||
NV_ASSERT_OR_RETURN(pmaTotalMemorySize >= numaTotalSize, NV_ERR_INVALID_STATE);
|
||||
}
|
||||
//
|
||||
// bug #200354346, make sure the RM reserved region(s) are
|
||||
// scrubbed during the region creation itself. Top Down scrubber,
|
||||
|
@ -69,11 +69,13 @@ static NvU32 _scrubMemory(OBJMEMSCRUB *pScrubber, RmPhysAddr base, NvU64 size,
|
||||
NvU32 dstCpuCacheAttrib, NvU32 freeToken);
|
||||
static void _scrubWaitAndSave(OBJMEMSCRUB *pScrubber, PSCRUB_NODE pList, NvLength itemsToSave);
|
||||
static NvU64 _scrubGetFreeEntries(OBJMEMSCRUB *pScrubber);
|
||||
static NvU64 _scrubCheckAndSubmit(OBJMEMSCRUB *pScrubber, NvU64 chunkSize, NvU64 *pPages,
|
||||
NvU64 pageCount, PSCRUB_NODE pList, NvLength pagesToScrubCheck);
|
||||
static NvU64 _scrubCheckAndSubmit(OBJMEMSCRUB *pScrubber, NvU64 pageCount, PSCRUB_NODE pList,
|
||||
PSCRUB_NODE pScrubListCopy, NvLength pagesToScrubCheck);
|
||||
static void _scrubCopyListItems(OBJMEMSCRUB *pScrubber, PSCRUB_NODE pList, NvLength itemsToSave);
|
||||
|
||||
static NV_STATUS _scrubCheckLocked(OBJMEMSCRUB *pScrubber, PSCRUB_NODE *ppList, NvU64 *pSize);
|
||||
static NV_STATUS _scrubCombinePages(NvU64 *pPages, NvU64 pageSize, NvU64 pageCount,
|
||||
PSCRUB_NODE *ppScrubList, NvU64 *pSize);
|
||||
|
||||
/**
|
||||
* Constructs the memory scrubber object and signals
|
||||
@ -403,63 +405,78 @@ scrubSubmitPages
|
||||
{
|
||||
NvU64 curPagesSaved = 0;
|
||||
PSCRUB_NODE pScrubList = NULL;
|
||||
PSCRUB_NODE pScrubListCopy = NULL;
|
||||
NvU64 scrubListSize = 0;
|
||||
NvLength pagesToScrubCheck = 0;
|
||||
NvU64 totalSubmitted = 0;
|
||||
NvU64 numFinished = 0;
|
||||
NvU64 freeEntriesInList = 0;
|
||||
NvU64 scrubCount = 0;
|
||||
NvU64 numPagesToScrub = pageCount;
|
||||
NvU64 numPagesToScrub = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
portSyncMutexAcquire(pScrubber->pScrubberMutex);
|
||||
*pSize = 0;
|
||||
*ppList = pScrubList;
|
||||
|
||||
NV_CHECK_OR_GOTO(LEVEL_INFO, pageCount > 0, cleanup);
|
||||
|
||||
NV_PRINTF(LEVEL_INFO, "submitting pages, pageCount = 0x%llx chunkSize = 0x%llx\n", pageCount, chunkSize);
|
||||
|
||||
freeEntriesInList = _scrubGetFreeEntries(pScrubber);
|
||||
if (freeEntriesInList < pageCount)
|
||||
{
|
||||
pScrubList = (PSCRUB_NODE)
|
||||
portMemAllocNonPaged((NvLength)(sizeof(SCRUB_NODE) * (pageCount - freeEntriesInList)));
|
||||
|
||||
if (pScrubList == NULL)
|
||||
NV_ASSERT_OK_OR_GOTO(status,
|
||||
_scrubCombinePages(pPages,
|
||||
chunkSize,
|
||||
pageCount,
|
||||
&pScrubList,
|
||||
&scrubListSize),
|
||||
cleanup);
|
||||
|
||||
numPagesToScrub = scrubListSize;
|
||||
|
||||
if (freeEntriesInList < scrubListSize)
|
||||
{
|
||||
pScrubListCopy = (PSCRUB_NODE)
|
||||
portMemAllocNonPaged((NvLength)(sizeof(SCRUB_NODE) * (scrubListSize - freeEntriesInList)));
|
||||
|
||||
if (pScrubListCopy == NULL)
|
||||
{
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
while (freeEntriesInList < pageCount)
|
||||
while (freeEntriesInList < scrubListSize)
|
||||
{
|
||||
if (pageCount > MAX_SCRUB_ITEMS)
|
||||
if (scrubListSize > MAX_SCRUB_ITEMS)
|
||||
{
|
||||
pagesToScrubCheck = (NvLength)(MAX_SCRUB_ITEMS - freeEntriesInList);
|
||||
scrubCount = MAX_SCRUB_ITEMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
pagesToScrubCheck = (NvLength)(pageCount - freeEntriesInList);
|
||||
scrubCount = pageCount;
|
||||
pagesToScrubCheck = (NvLength)(scrubListSize - freeEntriesInList);
|
||||
scrubCount = scrubListSize;
|
||||
}
|
||||
|
||||
numFinished = _scrubCheckAndSubmit(pScrubber, chunkSize, &pPages[totalSubmitted],
|
||||
scrubCount, &pScrubList[curPagesSaved],
|
||||
numFinished = _scrubCheckAndSubmit(pScrubber, scrubCount,
|
||||
&pScrubList[totalSubmitted],
|
||||
&pScrubListCopy[curPagesSaved],
|
||||
pagesToScrubCheck);
|
||||
|
||||
pageCount -= numFinished;
|
||||
scrubListSize -= numFinished;
|
||||
curPagesSaved += pagesToScrubCheck;
|
||||
totalSubmitted += numFinished;
|
||||
freeEntriesInList = _scrubGetFreeEntries(pScrubber);
|
||||
}
|
||||
|
||||
*ppList = pScrubList;
|
||||
*ppList = pScrubListCopy;
|
||||
*pSize = curPagesSaved;
|
||||
}
|
||||
else
|
||||
{
|
||||
totalSubmitted = _scrubCheckAndSubmit(pScrubber, chunkSize, pPages,
|
||||
pageCount, NULL,
|
||||
0);
|
||||
totalSubmitted = _scrubCheckAndSubmit(pScrubber, scrubListSize,
|
||||
pScrubList, NULL, 0);
|
||||
*ppList = NULL;
|
||||
*pSize = 0;
|
||||
}
|
||||
@ -467,6 +484,12 @@ scrubSubmitPages
|
||||
cleanup:
|
||||
portSyncMutexRelease(pScrubber->pScrubberMutex);
|
||||
|
||||
if (pScrubList != NULL)
|
||||
{
|
||||
portMemFree(pScrubList);
|
||||
pScrubList = NULL;
|
||||
}
|
||||
|
||||
NV_CHECK_OK_OR_RETURN(LEVEL_INFO, status);
|
||||
|
||||
if (totalSubmitted == numPagesToScrub)
|
||||
@ -507,15 +530,33 @@ scrubWaitPages
|
||||
)
|
||||
{
|
||||
|
||||
NvU32 iter = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 iter = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
PSCRUB_NODE pScrubList = NULL;
|
||||
NvU64 scrubListSize = 0;
|
||||
|
||||
NV_ASSERT_OK_OR_RETURN(_scrubCombinePages(pPages,
|
||||
chunkSize,
|
||||
pageCount,
|
||||
&pScrubList,
|
||||
&scrubListSize));
|
||||
|
||||
portSyncMutexAcquire(pScrubber->pScrubberMutex);
|
||||
for (iter = 0; iter < pageCount; iter++)
|
||||
|
||||
for (iter = 0; iter < scrubListSize; iter++)
|
||||
{
|
||||
_waitForPayload(pScrubber, pPages[iter], (pPages[iter] + chunkSize - 1));
|
||||
_waitForPayload(pScrubber,
|
||||
pScrubList[iter].base,
|
||||
(pScrubList[iter].base + pScrubList[iter].size - 1));
|
||||
}
|
||||
portSyncMutexRelease(pScrubber->pScrubberMutex);
|
||||
|
||||
if (pScrubList != NULL)
|
||||
{
|
||||
portMemFree(pScrubList);
|
||||
pScrubList = NULL;
|
||||
}
|
||||
|
||||
return status;
|
||||
|
||||
}
|
||||
@ -644,29 +685,28 @@ _scrubCopyListItems
|
||||
/* This function is used to check and submit work items always within the
|
||||
* available / maximum scrub list size.
|
||||
*
|
||||
* @param[in] pScrubber OBJMEMSCRUB pointer
|
||||
* @param[in] chunkSize size of each page
|
||||
* @param[in] pPages Array of base address
|
||||
* @param[in] pageCount number of pages in the array
|
||||
* @param[in] pList pointer will store the return check array
|
||||
* @param[in] pScrubber OBJMEMSCRUB pointer
|
||||
* @param[in] pageCount number of pages in the array
|
||||
* @param[in] pList pointer will store the return check array
|
||||
* @param[in] pScrubListCopy List where pages are saved
|
||||
* @param[in] pagesToScrubCheck How many pages will need to be saved
|
||||
* @returns the number of work successfully submitted, else 0
|
||||
*/
|
||||
static NvU64
|
||||
_scrubCheckAndSubmit
|
||||
(
|
||||
OBJMEMSCRUB *pScrubber,
|
||||
NvU64 chunkSize,
|
||||
NvU64 *pPages,
|
||||
NvU64 pageCount,
|
||||
PSCRUB_NODE pList,
|
||||
PSCRUB_NODE pScrubListCopy,
|
||||
NvLength pagesToScrubCheck
|
||||
)
|
||||
{
|
||||
NvU64 iter = 0;
|
||||
NvU64 newId;
|
||||
NV_STATUS status;
|
||||
NvU64 iter = 0;
|
||||
NvU64 newId;
|
||||
NV_STATUS status;
|
||||
|
||||
if (pList == NULL && pagesToScrubCheck != 0)
|
||||
if (pScrubListCopy == NULL && pagesToScrubCheck != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"pages need to be saved off, but stash list is invalid\n");
|
||||
@ -681,19 +721,19 @@ _scrubCheckAndSubmit
|
||||
|
||||
NV_PRINTF(LEVEL_INFO,
|
||||
"Submitting work, Id: %llx, base: %llx, size: %llx\n",
|
||||
newId, pPages[iter], chunkSize);
|
||||
newId, pList[iter].base, pList[iter].size);
|
||||
|
||||
{
|
||||
status =_scrubMemory(pScrubber, pPages[iter], chunkSize, NV_MEMORY_DEFAULT,
|
||||
status =_scrubMemory(pScrubber, pList[iter].base, pList[iter].size, NV_MEMORY_DEFAULT,
|
||||
(NvU32)newId);
|
||||
}
|
||||
|
||||
if(status != NV_OK)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Failing because the work dint submit.\n");
|
||||
NV_PRINTF(LEVEL_ERROR, "Failing because the work didn't submit.\n");
|
||||
goto exit;
|
||||
}
|
||||
_scrubAddWorkToList(pScrubber, pPages[iter], chunkSize, newId);
|
||||
_scrubAddWorkToList(pScrubber, pList[iter].base, pList[iter].size, newId);
|
||||
_scrubCheckProgress(pScrubber);
|
||||
}
|
||||
|
||||
@ -897,7 +937,7 @@ _scrubCheckProgress
|
||||
else
|
||||
lastSWSemaphoreDone = ceutilsUpdateProgress(pScrubber->pCeUtils);
|
||||
}
|
||||
|
||||
|
||||
pScrubber->lastSWSemaphoreDone = lastSWSemaphoreDone;
|
||||
|
||||
return lastSWSemaphoreDone;
|
||||
@ -949,3 +989,42 @@ cleanup:
|
||||
memdescDestroy(pMemDesc);
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS
|
||||
_scrubCombinePages
|
||||
(
|
||||
NvU64 *pPages,
|
||||
NvU64 pageSize,
|
||||
NvU64 pageCount,
|
||||
PSCRUB_NODE *ppScrubList,
|
||||
NvU64 *pSize
|
||||
)
|
||||
{
|
||||
NvU64 i, j;
|
||||
|
||||
*ppScrubList = (PSCRUB_NODE)portMemAllocNonPaged(sizeof(SCRUB_NODE) * pageCount);
|
||||
NV_ASSERT_OR_RETURN(*ppScrubList != NULL, NV_ERR_NO_MEMORY);
|
||||
|
||||
// Copy first element from original list to new list
|
||||
(*ppScrubList)[0].base = pPages[0];
|
||||
(*ppScrubList)[0].size = pageSize;
|
||||
|
||||
for (i = 0, j = 0; i < (pageCount - 1); i++)
|
||||
{
|
||||
if ((((*ppScrubList)[j].size + pageSize) > SCRUB_MAX_BYTES_PER_LINE) ||
|
||||
((pPages[i] + pageSize) != pPages[i+1]))
|
||||
{
|
||||
j++;
|
||||
(*ppScrubList)[j].base = pPages[i+1];
|
||||
(*ppScrubList)[j].size = pageSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
(*ppScrubList)[j].size += pageSize;
|
||||
}
|
||||
}
|
||||
|
||||
*pSize = j + 1;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -363,7 +363,7 @@ static NV_STATUS _pmaNumaAllocatePages
|
||||
osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
|
||||
}
|
||||
|
||||
if (bScrubOnAlloc)
|
||||
if (bScrubOnAlloc && (i > 0))
|
||||
{
|
||||
PSCRUB_NODE pPmaScrubList = NULL;
|
||||
NvU64 count;
|
||||
|
@ -1618,6 +1618,24 @@ pmaGetFreeMemory
|
||||
NvU64 *pBytesFree
|
||||
)
|
||||
{
|
||||
#if !defined(SRT_BUILD)
|
||||
NvU64 val;
|
||||
|
||||
portSyncSpinlockAcquire(pPma->pPmaLock);
|
||||
NvBool nodeOnlined = pPma->nodeOnlined;
|
||||
portSyncSpinlockRelease(pPma->pPmaLock);
|
||||
|
||||
if (nodeOnlined)
|
||||
{
|
||||
osGetNumaMemoryUsage(pPma->numaNodeId, pBytesFree, &val);
|
||||
return;
|
||||
}
|
||||
//
|
||||
// what to return when bNUMA == NV_TRUE and nodeOnlined==NV_FALSE?
|
||||
// TODO : BUG 4199482.
|
||||
//
|
||||
#endif
|
||||
|
||||
portSyncSpinlockAcquire(pPma->pPmaLock);
|
||||
|
||||
*pBytesFree = pPma->pmaStats.numFreeFrames << PMA_PAGE_SHIFT;
|
||||
@ -1638,6 +1656,24 @@ pmaGetTotalMemory
|
||||
|
||||
*pBytesTotal = 0;
|
||||
|
||||
#if !defined(SRT_BUILD)
|
||||
NvU64 val;
|
||||
|
||||
portSyncSpinlockAcquire(pPma->pPmaLock);
|
||||
NvBool nodeOnlined = pPma->nodeOnlined;
|
||||
portSyncSpinlockRelease(pPma->pPmaLock);
|
||||
|
||||
if (nodeOnlined)
|
||||
{
|
||||
osGetNumaMemoryUsage(pPma->numaNodeId, &val, pBytesTotal);
|
||||
return;
|
||||
}
|
||||
//
|
||||
// what to return when bNUMA == NV_TRUE and nodeOnlined==NV_FALSE?
|
||||
// TODO : BUG 4199482.
|
||||
//
|
||||
#endif
|
||||
|
||||
for (i = 0; i < pPma->regSize; i++)
|
||||
{
|
||||
pMap = pPma->pRegions[i];
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -23,15 +23,24 @@
|
||||
|
||||
#include "core/core.h"
|
||||
#include "gpu/gpu.h"
|
||||
#include "nvtypes.h"
|
||||
#include "os/os.h"
|
||||
#include "kernel/gpu/mem_sys/kern_mem_sys.h"
|
||||
#include "gpu/mem_mgr/mem_desc.h"
|
||||
#include "gpu/bus/kern_bus.h"
|
||||
#include "kernel/gpu/intr/intr.h"
|
||||
#include "nverror.h"
|
||||
|
||||
#include "published/hopper/gh100/dev_fb.h"
|
||||
#include "published/hopper/gh100/dev_ltc.h"
|
||||
#include "published/hopper/gh100/dev_fbpa.h"
|
||||
#include "published/hopper/gh100/dev_vm.h"
|
||||
#include "published/hopper/gh100/pri_nv_xal_ep.h"
|
||||
#include "published/hopper/gh100/dev_nv_xal_addendum.h"
|
||||
#include "published/hopper/gh100/dev_nv_xpl.h"
|
||||
#include "published/hopper/gh100/dev_xtl_ep_pri.h"
|
||||
#include "published/hopper/gh100/hwproject.h"
|
||||
#include "published/ampere/ga100/dev_fb.h"
|
||||
|
||||
NV_STATUS
|
||||
kmemsysDoCacheOp_GH100
|
||||
@ -566,3 +575,168 @@ kmemsysSwizzIdToVmmuSegmentsRange_GH100
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
/*!
|
||||
* Utility function used to read registers and ignore PRI errors
|
||||
*/
|
||||
static NvU32
|
||||
_kmemsysReadRegAndMaskPriError
|
||||
(
|
||||
OBJGPU *pGpu,
|
||||
NvU32 regAddr
|
||||
)
|
||||
{
|
||||
NvU32 regVal;
|
||||
|
||||
regVal = osGpuReadReg032(pGpu, regAddr);
|
||||
if ((regVal & GPU_READ_PRI_ERROR_MASK) == GPU_READ_PRI_ERROR_CODE)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return regVal;
|
||||
}
|
||||
/*
|
||||
* @brief Function that checks if ECC error occurred by reading various count
|
||||
* registers/interrupt registers. This function is not floorsweeping-aware so
|
||||
* PRI errors are ignored
|
||||
*/
|
||||
void
|
||||
kmemsysCheckEccCounts_GH100
|
||||
(
|
||||
OBJGPU *pGpu,
|
||||
KernelMemorySystem *pKernelMemorySystem
|
||||
)
|
||||
{
|
||||
NvU32 dramCount = 0;
|
||||
NvU32 mmuCount = 0;
|
||||
NvU32 ltcCount = 0;
|
||||
NvU32 pcieCount = 0;
|
||||
NvU32 regVal;
|
||||
for (NvU32 i = 0; i < NV_SCAL_LITTER_NUM_FBPAS; i++)
|
||||
{
|
||||
for (NvU32 j = 0; j < NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1; j++)
|
||||
{
|
||||
// DRAM count read
|
||||
dramCount += _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_FBPA_0_ECC_DED_COUNT(j) + (i * NV_FBPA_PRI_STRIDE));
|
||||
|
||||
// LTC count read
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT +
|
||||
(i * NV_LTC_PRI_STRIDE) + (j * NV_LTS_PRI_STRIDE));
|
||||
ltcCount += DRF_VAL(_PLTCG_LTC0_LTS0, _L2_CACHE_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
}
|
||||
}
|
||||
|
||||
// L2TLB
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT);
|
||||
mmuCount += DRF_VAL(_PFB_PRI_MMU, _L2TLB_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
|
||||
// HUBTLB
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT);
|
||||
mmuCount += DRF_VAL(_PFB_PRI_MMU, _HUBTLB_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
|
||||
// FILLUNIT
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT);
|
||||
mmuCount += DRF_VAL(_PFB_PRI_MMU, _FILLUNIT_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
|
||||
// PCIE RBUF
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_COUNT_RBUF);
|
||||
pcieCount += DRF_VAL(_XPL_DL, _ERR_COUNT_RBUF, _UNCORR_ERR, regVal);
|
||||
|
||||
// PCIE SEQ_LUT
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_COUNT_SEQ_LUT);
|
||||
pcieCount += DRF_VAL(_XPL_DL, _ERR_COUNT_SEQ_LUT, _UNCORR_ERR, regVal);
|
||||
|
||||
// PCIE RE ORDER
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT);
|
||||
pcieCount += DRF_VAL(_XAL_EP, _REORDER_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
|
||||
// PCIE P2PREQ
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT);
|
||||
pcieCount += DRF_VAL(_XAL_EP, _P2PREQ_ECC, _UNCORRECTED_ERR_COUNT_UNIQUE, regVal);
|
||||
|
||||
// PCIE XTL
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_DED_ERROR_STATUS);
|
||||
if (regVal != 0)
|
||||
{
|
||||
pcieCount += 1;
|
||||
}
|
||||
|
||||
// PCIE XTL
|
||||
regVal = _kmemsysReadRegAndMaskPriError(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS);
|
||||
if (regVal != 0)
|
||||
{
|
||||
pcieCount += 1;
|
||||
}
|
||||
|
||||
// If counts > 0 or if poison interrupt pending, ECC error has occurred.
|
||||
if (((dramCount + ltcCount + mmuCount + pcieCount) != 0) ||
|
||||
intrIsVectorPending_HAL(pGpu, GPU_GET_INTR(pGpu), NV_PFB_FBHUB_POISON_INTR_VECTOR_HW_INIT, NULL))
|
||||
{
|
||||
nvErrorLog_va((void *)pGpu, UNRECOVERABLE_ECC_ERROR_ESCAPE,
|
||||
"An uncorrectable ECC error detected "
|
||||
"(possible firmware handling failure) "
|
||||
"DRAM:%d, LTC:%d, MMU:%d, PCIE:%d", dramCount, ltcCount, mmuCount, pcieCount);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Function that clears ECC error count registers.
|
||||
*/
|
||||
NV_STATUS
|
||||
kmemsysClearEccCounts_GH100
|
||||
(
|
||||
OBJGPU *pGpu,
|
||||
KernelMemorySystem *pKernelMemorySystem
|
||||
)
|
||||
{
|
||||
NvU32 regVal = 0;
|
||||
RMTIMEOUT timeout;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
gpuClearFbhubPoisonIntrForBug2924523_HAL(pGpu);
|
||||
|
||||
for (NvU32 i = 0; i < NV_SCAL_LITTER_NUM_FBPAS; i++)
|
||||
{
|
||||
for (NvU32 j = 0; j < NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1; j++)
|
||||
{
|
||||
osGpuWriteReg032(pGpu, NV_PFB_FBPA_0_ECC_DED_COUNT(j) + (i * NV_FBPA_PRI_STRIDE), 0);
|
||||
osGpuWriteReg032(pGpu, NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT + (i * NV_LTC_PRI_STRIDE) + (j * NV_LTS_PRI_STRIDE), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Reset MMU counts
|
||||
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT, 0);
|
||||
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT, 0);
|
||||
osGpuWriteReg032(pGpu, NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT, 0);
|
||||
|
||||
// Reset XAL-EP counts
|
||||
osGpuWriteReg032(pGpu, NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT, 0);
|
||||
osGpuWriteReg032(pGpu, NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT, 0);
|
||||
|
||||
// Reset XTL-EP status registers
|
||||
osGpuWriteReg032(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_DED_ERROR_STATUS, ~0);
|
||||
osGpuWriteReg032(pGpu, NV_XTL_BASE_ADDRESS + NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS, ~0);
|
||||
|
||||
// Reset XPL-EP error counters
|
||||
regVal = DRF_DEF(_XPL, _DL_ERR_RESET, _RBUF_UNCORR_ERR_COUNT, _PENDING) |
|
||||
DRF_DEF(_XPL, _DL_ERR_RESET, _SEQ_LUT_UNCORR_ERR_COUNT, _PENDING);
|
||||
osGpuWriteReg032(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_RESET, regVal);
|
||||
|
||||
// Wait for the error counter reset to complete
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
|
||||
for (;;)
|
||||
{
|
||||
status = gpuCheckTimeout(pGpu, &timeout);
|
||||
|
||||
regVal = osGpuReadReg032(pGpu, NV_XPL_BASE_ADDRESS + NV_XPL_DL_ERR_RESET);
|
||||
|
||||
if (FLD_TEST_DRF(_XPL, _DL_ERR_RESET, _RBUF_UNCORR_ERR_COUNT, _DONE, regVal) &&
|
||||
FLD_TEST_DRF(_XPL, _DL_ERR_RESET, _SEQ_LUT_UNCORR_ERR_COUNT, _DONE, regVal))
|
||||
break;
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -5800,6 +5800,7 @@ kmigmgrInitGPUInstanceBufPools_IMPL
|
||||
{
|
||||
Heap *pHeap;
|
||||
MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
|
||||
NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED;
|
||||
NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap;
|
||||
NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE);
|
||||
@ -5815,7 +5816,12 @@ kmigmgrInitGPUInstanceBufPools_IMPL
|
||||
// This is just a sanity check to make sure this assumption is correct and
|
||||
// allocation from PMA cannot trigger UVM evictions.
|
||||
//
|
||||
if (memmgrIsPmaInitialized(pMemoryManager))
|
||||
// When FB memory is onlined as NUMA node, kernel can directly alloc FB memory
|
||||
// and hence free memory can not be expected to be same as total memory.
|
||||
//
|
||||
if (memmgrIsPmaInitialized(pMemoryManager) &&
|
||||
(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig) == NV_OK) &&
|
||||
!(pmaConfig & PMA_QUERY_NUMA_ONLINED))
|
||||
{
|
||||
NvU64 freeSpace, totalSpace;
|
||||
pmaGetFreeMemory(&pHeap->pmaObject, &freeSpace);
|
||||
|
@ -93,6 +93,7 @@ _memoryfabricValidatePhysMem
|
||||
MEMORY_DESCRIPTOR *pPhysMemDesc;
|
||||
NvU64 physPageSize;
|
||||
NV_STATUS status;
|
||||
Memory *pMemory;
|
||||
|
||||
if (hPhysMem == 0)
|
||||
{
|
||||
@ -110,7 +111,19 @@ _memoryfabricValidatePhysMem
|
||||
return status;
|
||||
}
|
||||
|
||||
pPhysMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
|
||||
pMemory = dynamicCast(pPhysmemRef->pResource, Memory);
|
||||
if (pMemory == NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
|
||||
return NV_ERR_INVALID_OBJECT_HANDLE;
|
||||
}
|
||||
|
||||
pPhysMemDesc = pMemory->pMemDesc;
|
||||
if (pPhysMemDesc == NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
|
||||
return NV_ERR_INVALID_OBJECT_HANDLE;
|
||||
}
|
||||
|
||||
if ((pOwnerGpu != pPhysMemDesc->pGpu) ||
|
||||
!memmgrIsApertureSupportedByFla_HAL(pOwnerGpu, pMemoryManager,
|
||||
|
@ -218,7 +218,7 @@ _memMulticastFabricDescriptorDequeueWaitUnderLock
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
static NV_STATUS
|
||||
_memMulticastFabricGpuInfoAddUnderLock
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
@ -1027,8 +1027,8 @@ memorymulticastfabricConstruct_IMPL
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlAttachGpu_IMPL
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricCtrlAttachGpu
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
|
||||
@ -1041,14 +1041,13 @@ memorymulticastfabricCtrlAttachGpu_IMPL
|
||||
OBJGPU *pGpu;
|
||||
FABRIC_VASPACE *pFabricVAS;
|
||||
NvU64 gpuProbeHandle;
|
||||
MEM_MULTICAST_FABRIC_GPU_INFO *pNode = \
|
||||
listTail(&pMulticastFabricDesc->gpuInfoList);
|
||||
MEM_MULTICAST_FABRIC_GPU_INFO *pNode = NULL;
|
||||
CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
|
||||
|
||||
if (pParams->flags != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "flags passed for attach mem must be zero\n");
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto fail;
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Check if the Multicast FLA object has any additional slots for GPUs
|
||||
@ -1070,10 +1069,19 @@ memorymulticastfabricCtrlAttachGpu_IMPL
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Multicast attach not supported on Windows/CC/vGPU modes\n");
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto fail;
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
status = _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric,
|
||||
pCallContext->pControlParams);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Failed to populate GPU info\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
pNode = listTail(&pMulticastFabricDesc->gpuInfoList);
|
||||
|
||||
status = gpuFabricProbeGetGpuFabricHandle(pGpu->pGpuFabricProbeInfoKernel,
|
||||
&gpuProbeHandle);
|
||||
if (status != NV_OK)
|
||||
@ -1119,6 +1127,26 @@ fail:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlAttachGpu_IMPL
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_ATTACH_GPU_PARAMS *pParams
|
||||
)
|
||||
{
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
status = _memorymulticastfabricCtrlAttachGpu(pMemoryMulticastFabric,
|
||||
pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static MEM_MULTICAST_FABRIC_GPU_INFO*
|
||||
_memorymulticastfabricGetAttchedGpuInfo
|
||||
(
|
||||
@ -1148,8 +1176,8 @@ _memorymulticastfabricGetAttchedGpuInfo
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlDetachMem_IMPL
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricCtrlDetachMem
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
|
||||
@ -1189,6 +1217,26 @@ memorymulticastfabricCtrlDetachMem_IMPL
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlDetachMem_IMPL
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_DETACH_MEM_PARAMS *pParams
|
||||
)
|
||||
{
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
status = _memorymulticastfabricCtrlDetachMem(pMemoryMulticastFabric,
|
||||
pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricValidatePhysMem
|
||||
(
|
||||
@ -1202,6 +1250,7 @@ _memorymulticastfabricValidatePhysMem
|
||||
MEMORY_DESCRIPTOR *pPhysMemDesc;
|
||||
NvU64 physPageSize;
|
||||
NV_STATUS status;
|
||||
Memory *pMemory;
|
||||
|
||||
status = serverutilGetResourceRef(RES_GET_CLIENT_HANDLE(pMemoryMulticastFabric),
|
||||
hPhysMem, &pPhysmemRef);
|
||||
@ -1213,7 +1262,19 @@ _memorymulticastfabricValidatePhysMem
|
||||
return status;
|
||||
}
|
||||
|
||||
pPhysMemDesc = (dynamicCast(pPhysmemRef->pResource, Memory))->pMemDesc;
|
||||
pMemory = dynamicCast(pPhysmemRef->pResource, Memory);
|
||||
if (pMemory == NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
|
||||
return NV_ERR_INVALID_OBJECT_HANDLE;
|
||||
}
|
||||
|
||||
pPhysMemDesc = pMemory->pMemDesc;
|
||||
if (pPhysMemDesc == NULL)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Invalid memory handle\n");
|
||||
return NV_ERR_INVALID_OBJECT_HANDLE;
|
||||
}
|
||||
|
||||
if (memdescGetAddressSpace(pPhysMemDesc) != ADDR_FBMEM ||
|
||||
(pAttachedGpu != pPhysMemDesc->pGpu))
|
||||
@ -1237,8 +1298,8 @@ _memorymulticastfabricValidatePhysMem
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlAttachMem_IMPL
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricCtrlAttachMem
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
|
||||
@ -1342,6 +1403,26 @@ freeDupedMem:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlAttachMem_IMPL
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_ATTACH_MEM_PARAMS *pParams
|
||||
)
|
||||
{
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
status = _memorymulticastfabricCtrlAttachMem(pMemoryMulticastFabric,
|
||||
pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void
|
||||
memorymulticastfabricDestruct_IMPL
|
||||
(
|
||||
@ -1393,8 +1474,8 @@ memorymulticastfabricCopyConstruct_IMPL
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlGetInfo_IMPL
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricCtrlGetInfo
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_GET_INFO_PARAMS *pParams
|
||||
@ -1413,6 +1494,26 @@ memorymulticastfabricCtrlGetInfo_IMPL
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlGetInfo_IMPL
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_GET_INFO_PARAMS *pParams
|
||||
)
|
||||
{
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
status = _memorymulticastfabricCtrlGetInfo(pMemoryMulticastFabric,
|
||||
pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricIsReady_IMPL
|
||||
(
|
||||
@ -1451,8 +1552,8 @@ memorymulticastfabricIsReady_IMPL
|
||||
return mcTeamStatus;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricCtrlRegisterEvent_IMPL
|
||||
static NV_STATUS
|
||||
_memorymulticastfabricCtrlRegisterEvent
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
|
||||
@ -1467,20 +1568,23 @@ memorymulticastfabricCtrlRegisterEvent_IMPL
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
memorymulticastfabricControl_Prologue_IMPL
|
||||
memorymulticastfabricCtrlRegisterEvent_IMPL
|
||||
(
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
CALL_CONTEXT *pCallContext,
|
||||
RS_RES_CONTROL_PARAMS_INTERNAL *pParams
|
||||
MemoryMulticastFabric *pMemoryMulticastFabric,
|
||||
NV00FD_CTRL_REGISTER_EVENT_PARAMS *pParams
|
||||
)
|
||||
{
|
||||
RmResource *pResource = staticCast(pMemoryMulticastFabric, RmResource);
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
// Other control calls, nothing to be validated.
|
||||
if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
|
||||
return rmresControl_Prologue_IMPL(pResource, pCallContext, pParams);
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
return _memMulticastFabricGpuInfoAddUnderLock(pMemoryMulticastFabric, pParams);
|
||||
status = _memorymulticastfabricCtrlRegisterEvent(pMemoryMulticastFabric,
|
||||
pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS
|
||||
@ -1491,7 +1595,6 @@ memorymulticastfabricControl_IMPL
|
||||
RS_RES_CONTROL_PARAMS_INTERNAL *pParams
|
||||
)
|
||||
{
|
||||
Fabric *pFabric = SYS_GET_FABRIC(SYS_GET_INSTANCE());
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (pParams->cmd != NV00FD_CTRL_CMD_ATTACH_GPU)
|
||||
@ -1522,14 +1625,13 @@ memorymulticastfabricControl_IMPL
|
||||
NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status);
|
||||
}
|
||||
|
||||
fabricMulticastFabricOpsMutexAcquire(pFabric);
|
||||
|
||||
status = resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
|
||||
pCallContext, pParams);
|
||||
|
||||
fabricMulticastFabricOpsMutexRelease(pFabric);
|
||||
|
||||
return status;
|
||||
//
|
||||
// Note: GPU lock(s) is required for some control calls. Thus, it is
|
||||
// incorrect to take the leaf lock here. resControl_IMPL() attempts to
|
||||
// acquire the GPU locks before it calls the control call body.
|
||||
//
|
||||
return resControl_IMPL(staticCast(pMemoryMulticastFabric, RsResource),
|
||||
pCallContext, pParams);
|
||||
}
|
||||
|
||||
NvBool
|
||||
|
@ -82,6 +82,8 @@ rmclientConstruct_IMPL
|
||||
pClient->pSecurityToken = NULL;
|
||||
pClient->pOSInfo = pSecInfo->clientOSInfo;
|
||||
|
||||
pClient->cachedPrivilege = pSecInfo->privLevel;
|
||||
|
||||
// TODO: Revisit in M2, see GPUSWSEC-1176
|
||||
if (RMCFG_FEATURE_PLATFORM_GSP && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu))
|
||||
{
|
||||
@ -96,10 +98,10 @@ rmclientConstruct_IMPL
|
||||
else
|
||||
{
|
||||
pClient->ProcID = osGetCurrentProcess();
|
||||
if (pClient->cachedPrivilege <= RS_PRIV_LEVEL_USER_ROOT)
|
||||
pClient->pOsPidInfo = osGetPidInfo();
|
||||
}
|
||||
|
||||
pClient->cachedPrivilege = pSecInfo->privLevel;
|
||||
|
||||
// Set user-friendly client name from current process
|
||||
osGetCurrentProcessName(pClient->name, NV_PROC_NAME_MAX_LENGTH);
|
||||
|
||||
@ -128,7 +130,7 @@ rmclientConstruct_IMPL
|
||||
{
|
||||
NV_PRINTF(LEVEL_WARNING,
|
||||
"NVRM_RPC: Failed to set host client resource handle range %x\n", status);
|
||||
return status;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -139,7 +141,7 @@ rmclientConstruct_IMPL
|
||||
{
|
||||
NV_PRINTF(LEVEL_WARNING,
|
||||
"Failed to set host client restricted resource handle range. Status=%x\n", status);
|
||||
return status;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!rmGpuLockIsOwner())
|
||||
@ -148,7 +150,7 @@ rmclientConstruct_IMPL
|
||||
if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_CLIENT)) != NV_OK)
|
||||
{
|
||||
NV_ASSERT(0);
|
||||
return status;
|
||||
goto out;
|
||||
}
|
||||
bReleaseLock = NV_TRUE;
|
||||
}
|
||||
@ -206,6 +208,13 @@ rmclientConstruct_IMPL
|
||||
if (status == NV_OK && pParams->pAllocParams != NULL)
|
||||
*(NvHandle*)(pParams->pAllocParams) = pParams->hClient;
|
||||
|
||||
out:
|
||||
if (status != NV_OK)
|
||||
{
|
||||
osPutPidInfo(pClient->pOsPidInfo);
|
||||
pClient->pOsPidInfo = NULL;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -230,6 +239,8 @@ rmclientDestruct_IMPL
|
||||
// Free any association of the client with existing third-party p2p object
|
||||
CliUnregisterFromThirdPartyP2P(pClient);
|
||||
|
||||
osPutPidInfo(pClient->pOsPidInfo);
|
||||
|
||||
//
|
||||
// Free all of the devices of the client (do it in reverse order to
|
||||
// facilitate tear down of things like ctxdmas, etc)
|
||||
|
@ -1013,7 +1013,7 @@ _rmapiControlWithSecInfoTlsIRQL
|
||||
NV_STATUS status;
|
||||
THREAD_STATE_NODE threadState;
|
||||
|
||||
NvU8 stackAllocator[TLS_ISR_ALLOCATOR_SIZE];
|
||||
NvU8 stackAllocator[2*TLS_ISR_ALLOCATOR_SIZE];
|
||||
PORT_MEM_ALLOCATOR* pIsrAllocator = portMemAllocatorCreateOnExistingBlock(stackAllocator, sizeof(stackAllocator));
|
||||
tlsIsrInit(pIsrAllocator);
|
||||
|
||||
|
@ -142,6 +142,7 @@ static CrashCatBufferDescriptor *_crashcatEngineCreateBufferDescriptor
|
||||
|
||||
portMemSet(pBufDesc, 0, sizeof(*pBufDesc));
|
||||
|
||||
pBufDesc->bRegistered = NV_FALSE;
|
||||
pBufDesc->aperture = aperture;
|
||||
pBufDesc->physOffset = offset;
|
||||
pBufDesc->size = size;
|
||||
@ -315,6 +316,8 @@ void *crashcatEngineMapCrashBuffer_IMPL
|
||||
//
|
||||
if (!pBufDesc->bRegistered)
|
||||
_crashcatEngineDestroyBufferDescriptor(pCrashCatEng, pBufDesc);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return pBufDesc->pMapping;
|
||||
|
@ -69,7 +69,11 @@ void crashcatReportLogReporter_V1_GENERIC(CrashCatReport *pReport)
|
||||
NvCrashCatNvriscvUcodeId ucodeId = crashcatReportV1ReporterUcodeId(pReportV1);
|
||||
NV_CRASHCAT_RISCV_MODE riscvMode = crashcatReportV1ReporterMode(pReportV1);
|
||||
|
||||
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
|
||||
//
|
||||
// Though this is technically not a separate packet, we use the CRASHCAT_REPORT_LOG_PACKET_TYPE
|
||||
// macro to get the correct prefix/indentation for the reporter information.
|
||||
//
|
||||
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport,
|
||||
"Reported by partition:%u ucode:%u [%c-mode] version:%u @ %u",
|
||||
partition, ucodeId, crashcatReportModeToChar_GENERIC(riscvMode),
|
||||
crashcatReportV1ReporterVersion(pReportV1),
|
||||
|
@ -114,10 +114,13 @@ void crashcatReportLogReporter_V1_LIBOS2(CrashCatReport *pReport)
|
||||
NvCrashCatReport_V1 *pReportV1 = &pReport->v1.report;
|
||||
NvU8 taskId = crashcatReportV1ReporterLibos2TaskId(pReportV1);
|
||||
|
||||
//
|
||||
// Though this is technically not a separate packet, we use the CRASHCAT_REPORT_LOG_PACKET_TYPE
|
||||
// macro to get the correct prefix/indentation for the reporter information.
|
||||
//
|
||||
if (taskId == NV_CRASHCAT_REPORT_V1_REPORTER_ID_LIBOS2_TASK_ID_UNSPECIFIED)
|
||||
{
|
||||
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
|
||||
"Reported by libos kernel v%u.%u [%u] @ %u",
|
||||
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport, "Reported by libos kernel v%u.%u [%u] @ %u",
|
||||
crashcatReportV1ReporterVersionLibos2Major(pReportV1),
|
||||
crashcatReportV1ReporterVersionLibos2Minor(pReportV1),
|
||||
crashcatReportV1ReporterVersionLibos2Cl(pReportV1),
|
||||
@ -125,8 +128,7 @@ void crashcatReportLogReporter_V1_LIBOS2(CrashCatReport *pReport)
|
||||
}
|
||||
else
|
||||
{
|
||||
crashcatEnginePrintf(pReport->pEngine, NV_FALSE,
|
||||
"Reported by libos task:%u v%u.%u [%u] @ ts:%u",
|
||||
CRASHCAT_REPORT_LOG_PACKET_TYPE(pReport, "Reported by libos task:%u v%u.%u [%u] @ ts:%u",
|
||||
taskId, crashcatReportV1ReporterVersionLibos2Major(pReportV1),
|
||||
crashcatReportV1ReporterVersionLibos2Minor(pReportV1),
|
||||
crashcatReportV1ReporterVersionLibos2Cl(pReportV1),
|
||||
|
@ -223,9 +223,12 @@ mmuWalkFindLevel
|
||||
)
|
||||
{
|
||||
const MMU_WALK_LEVEL *pLevel = &pWalk->root;
|
||||
while (pLevel->pFmt != pLevelFmt)
|
||||
while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
|
||||
{
|
||||
NvU32 subLevel;
|
||||
|
||||
NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
|
||||
|
||||
// Single sub-level always continues.
|
||||
if (1 == pLevel->pFmt->numSubLevels)
|
||||
{
|
||||
|
@ -1444,6 +1444,14 @@ _portMemAllocatorCreateOnExistingBlock
|
||||
pAllocator->pTracking = NULL; // No tracking for this allocator
|
||||
pAllocator->pImpl = (PORT_MEM_ALLOCATOR_IMPL*)(pAllocator + 1);
|
||||
|
||||
|
||||
//
|
||||
// PORT_MEM_BITVECTOR (pAllocator->pImpl) and PORT_MEM_ALLOCATOR_TRACKING (pAllocator->pImpl->tracking)
|
||||
// are mutually exclusively used.
|
||||
// When pAllocator->pTracking = NULL the data in pAllocator->pImpl->tracking is not used and instead
|
||||
// pBitVector uses the same meory location.
|
||||
// When pAllocator->pImpl->tracking there is no usage of PORT_MEM_BITVECTOR
|
||||
//
|
||||
pBitVector = (PORT_MEM_BITVECTOR*)(pAllocator->pImpl);
|
||||
pBitVector->pSpinlock = pSpinlock;
|
||||
|
||||
@ -1544,6 +1552,10 @@ _portMemAllocatorAllocExistingWrapper
|
||||
{
|
||||
portSyncSpinlockRelease(pSpinlock);
|
||||
}
|
||||
if (pMem == NULL)
|
||||
{
|
||||
PORT_MEM_PRINT_ERROR("Memory allocation failed.\n");
|
||||
}
|
||||
return pMem;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
NVIDIA_VERSION = 535.104.05
|
||||
NVIDIA_VERSION = 535.113.01
|
||||
|
||||
# This file.
|
||||
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
||||
|
Loading…
Reference in New Issue
Block a user